diff --git a/aisenzhecode/沥青/日度价格预测_最佳模型.pkl b/aisenzhecode/沥青/日度价格预测_最佳模型.pkl index 86e2dff..bd86cf4 100644 Binary files a/aisenzhecode/沥青/日度价格预测_最佳模型.pkl and b/aisenzhecode/沥青/日度价格预测_最佳模型.pkl differ diff --git a/aisenzhecode/沥青/沥青定量每日执行.py b/aisenzhecode/沥青/沥青定量每日执行.py index 136a942..c48278d 100644 --- a/aisenzhecode/沥青/沥青定量每日执行.py +++ b/aisenzhecode/沥青/沥青定量每日执行.py @@ -1,3 +1,30 @@ +from statsmodels.tools.eval_measures import mse, rmse +from pandas import Series, DataFrame +import cufflinks as cf +from sklearn.metrics import r2_score +from sklearn.model_selection import train_test_split +from sklearn.metrics import mean_squared_error +import pickle +import warnings +from sklearn.model_selection import GridSearchCV +from sklearn.metrics import mean_absolute_error +from xgboost import plot_importance, plot_tree +import xgboost as xgb +import plotly.graph_objects as go +import plotly.express as px +from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator +import statsmodels.api as sm +from xgboost import XGBRegressor +from sklearn.linear_model import Lasso +import sklearn.datasets as datasets +from sklearn import preprocessing +from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot +from plotly import __version__ +import random +import seaborn as sn +import matplotlib.pyplot as plt +import numpy as np +import matplotlib import requests import json from datetime import datetime, timedelta @@ -38,63 +65,28 @@ read_file_path_name = "沥青数据项.xlsx" one_cols = [] two_cols = [] -import pandas as pd -import numpy as np -import matplotlib.pyplot as plt -import seaborn as sn -import random -import time -from plotly import __version__ -from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot +# 导入机器学习算法模型 -from sklearn import preprocessing - -from pandas import Series,DataFrame - -import matplotlib.pyplot as plt - -import sklearn.datasets as datasets - -#导入机器学习算法模型 -from sklearn.linear_model import Lasso -from xgboost import XGBRegressor - -import statsmodels.api as sm # from keras.preprocessing.sequence import TimeseriesGenerator -from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator -import plotly.express as px -import plotly.graph_objects as go -import xgboost as xgb -from xgboost import plot_importance, plot_tree -from sklearn.metrics import mean_absolute_error -from statsmodels.tools.eval_measures import mse,rmse -from sklearn.model_selection import GridSearchCV -from xgboost import XGBRegressor -import warnings -import pickle +# 切割训练数据和样本数据 -from sklearn.metrics import mean_squared_error - -#切割训练数据和样本数据 -from sklearn.model_selection import train_test_split - -#用于模型评分 -from sklearn.metrics import r2_score +# 用于模型评分 le = preprocessing.LabelEncoder() # print(__version__) # requires version >= 1.9.0 -import cufflinks as cf cf.go_offline() random.seed(100) # 数据获取 + + def get_head_auth(): login_res = requests.post(url=login_url, json=login_data, timeout=(3, 5)) text = json.loads(login_res.text) @@ -106,7 +98,7 @@ def get_head_auth(): return None -def get_data_value(token, dataItemNoList,date=''): +def get_data_value(token, dataItemNoList, date=''): search_data = { "data": { @@ -117,7 +109,8 @@ def get_data_value(token, dataItemNoList,date=''): "funcOperation": "查询" } headers = {"Authorization": token} - search_res = requests.post(url=search_url, headers=headers, json=search_data, timeout=(3, 5)) + search_res = requests.post( + url=search_url, headers=headers, json=search_data, timeout=(3, 5)) search_value = json.loads(search_res.text)["data"] if search_value: return search_value @@ -140,7 +133,7 @@ def getNow(date='', offset=0): tuple: (紧凑日期字符串, 标准日期字符串) """ # 日期解析逻辑 - from datetime import datetime,timedelta + from datetime import datetime, timedelta if isinstance(date, datetime): now = date else: @@ -158,7 +151,7 @@ def getNow(date='', offset=0): # 应用日期偏移 now = now - timedelta(days=offset) - + # 统一格式化输出 date_str = now.strftime("%Y-%m-%d") compact_date = date_str.replace("-", "") @@ -166,7 +159,8 @@ def getNow(date='', offset=0): def get_head_push_auth(): - login_res = requests.post(url=login_push_url, json=login_push_data, timeout=(3, 5)) + login_res = requests.post( + url=login_push_url, json=login_push_data, timeout=(3, 5)) text = json.loads(login_res.text) if text["status"]: token = text["data"]["accessToken"] @@ -175,7 +169,8 @@ def get_head_push_auth(): print("获取认证失败") return None -def upload_data_to_system(token_push,date): + +def upload_data_to_system(token_push, date): data = { "funcModule": "数据表信息列表", "funcOperation": "新增", @@ -189,37 +184,37 @@ def upload_data_to_system(token_push,date): ] } headers = {"Authorization": token_push} - res = requests.post(url=upload_url, headers=headers, json=data, timeout=(3, 5)) + res = requests.post(url=upload_url, headers=headers, + json=data, timeout=(3, 5)) print(res.text) + def forecast_price(): df_test = pd.read_excel('沥青数据项.xlsx') - df_test.drop([0],inplace=True) - df_test[['汽油执行价','柴油执行价','齐鲁石化销量','齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量', - '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价', - '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存', - '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存', - '京博签单量','京博库存量','京博产量','加权平均成交价']] = df_test[['汽油执行价','柴油执行价','齐鲁石化销量', - '齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量', - '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价', - '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存', - '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存', - '京博签单量','京博库存量','京博产量','加权平均成交价']].astype('float') + df_test.drop([0], inplace=True) + + # 优化的格式转换逻辑:除了'日期'列以外,所有列都转为float + for col in df_test.columns: + if col != '日期': + df_test[col] = pd.to_numeric(df_test[col], errors='coerce') # df_test['日期']=pd.to_datetime(df_test['日期'], format='%d/%m/%Y',infer_datetime_format=True) - df_test['日期']=pd.to_datetime(df_test['日期'], format='%Y-%m-%d',infer_datetime_format=True) + df_test['日期'] = pd.to_datetime( + df_test['日期'], format='%Y-%m-%d', infer_datetime_format=True) - #查看每个特征缺失值数量 - MisVal_Check=df_test.isnull().sum().sort_values(ascending=False) - #去掉缺失值百分比>0.4的特征,去掉这些特征后的新表格命名为df_test_1 - df_MisVal_Check = pd.DataFrame(MisVal_Check,)# - df_MisVal_Check_1=df_MisVal_Check.reset_index() - df_MisVal_Check_1.columns=['Variable_Name','Missing_Number'] - df_MisVal_Check_1['Missing_Number']=df_MisVal_Check_1['Missing_Number']/len(df_test) - df_test_1=df_test.drop(df_MisVal_Check_1[df_MisVal_Check_1['Missing_Number']>0.4].Variable_Name,axis = 1) + # 查看每个特征缺失值数量 + MisVal_Check = df_test.isnull().sum().sort_values(ascending=False) + # 去掉缺失值百分比>0.4的特征,去掉这些特征后的新表格命名为df_test_1 + df_MisVal_Check = pd.DataFrame(MisVal_Check,) + df_MisVal_Check_1 = df_MisVal_Check.reset_index() + df_MisVal_Check_1.columns = ['Variable_Name', 'Missing_Number'] + df_MisVal_Check_1['Missing_Number'] = df_MisVal_Check_1['Missing_Number'] / \ + len(df_test) + df_test_1 = df_test.drop( + df_MisVal_Check_1[df_MisVal_Check_1['Missing_Number'] > 0.4].Variable_Name, axis=1) - #将缺失值补为前一个或者后一个数值 - df_test_1=df_test_1.fillna(df_test.ffill()) - df_test_1=df_test_1.fillna(df_test_1.bfill()) + # 将缺失值补为前一个或者后一个数值 + df_test_1 = df_test_1.fillna(df_test.ffill()) + df_test_1 = df_test_1.fillna(df_test_1.bfill()) # 选择用于模型训练的列名称 col_for_training = df_test_1.columns @@ -229,71 +224,72 @@ def forecast_price(): df_test_1_Day = df_test_1.tail(1) # 移除不需要的列 df_test_1_Day.index = df_test_1_Day["日期"] - df_test_1_Day = df_test_1_Day.drop(["日期"], axis= 1) - df_test_1_Day=df_test_1_Day.drop('京博指导价',axis=1) - df_test_1_Day=df_test_1_Day.dropna() + df_test_1_Day = df_test_1_Day.drop(["日期"], axis=1) + df_test_1_Day = df_test_1_Day.drop('京博指导价', axis=1) + df_test_1_Day = df_test_1_Day.dropna() # df_test_1_Day - #预测今日价格,显示至小数点后两位 - Ypredict_Today=Best_model_DalyLGPrice.predict(df_test_1_Day) + # 预测今日价格,显示至小数点后两位 + Ypredict_Today = Best_model_DalyLGPrice.predict(df_test_1_Day) - df_test_1_Day['日度预测价格']=Ypredict_Today + df_test_1_Day['日度预测价格'] = Ypredict_Today print(df_test_1_Day['日度预测价格']) a = df_test_1_Day['日度预测价格'] a = a[0] a = float(a) - a = round(a,2) + a = round(a, 2) return a + + def optimize_Model(): from sklearn.model_selection import train_test_split - from sklearn.impute import SimpleImputer + from sklearn.impute import SimpleImputer from sklearn.preprocessing import OrdinalEncoder from sklearn.feature_selection import SelectFromModel from sklearn.metrics import mean_squared_error, r2_score - pd.set_option('display.max_rows',40) - pd.set_option('display.max_columns',40) + pd.set_option('display.max_rows', 40) + pd.set_option('display.max_columns', 40) df_test = pd.read_excel('沥青数据项.xlsx') - df_test.drop([0],inplace=True) - df_test[['汽油执行价','柴油执行价','齐鲁石化销量','齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量', - '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价', - '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存', - '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存', - '京博签单量','京博库存量','京博产量','加权平均成交价']] = df_test[['汽油执行价','柴油执行价','齐鲁石化销量','齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量', - '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价', - '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存', - '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存', - '京博签单量','京博库存量','京博产量','加权平均成交价']].astype('float') + df_test.drop([0], inplace=True) + + # 优化的格式转换逻辑:除了'日期'列以外,所有列都转为float + for col in df_test.columns: + if col != '日期': + df_test[col] = pd.to_numeric(df_test[col], errors='coerce') # df_test = pd.read_csv('定价模型数据收集20190901-20230615.csv',encoding = 'gbk',engine = 'python') # df_test['日期']=pd.to_datetime(df_test['日期'], format='%m/%d/%Y',infer_datetime_format=True) - df_test['日期']=pd.to_datetime(df_test['日期'], format='%Y-%m-%d',infer_datetime_format=True) + df_test['日期'] = pd.to_datetime( + df_test['日期'], format='%Y-%m-%d', infer_datetime_format=True) # df_test.tail(3) - MisVal_Check=df_test.isnull().sum().sort_values(ascending=False) - #去掉缺失值百分比>0.4的特征,去掉这些特征后的新表格命名为df_test_1 - df_MisVal_Check = pd.DataFrame(MisVal_Check,)# - df_MisVal_Check_1=df_MisVal_Check.reset_index() - df_MisVal_Check_1.columns=['Variable_Name','Missing_Number'] - df_MisVal_Check_1['Missing_Number']=df_MisVal_Check_1['Missing_Number']/len(df_test) - df_test_1=df_test.drop(df_MisVal_Check_1[df_MisVal_Check_1['Missing_Number']>0.4].Variable_Name,axis = 1) - #将缺失值补为前一个或者后一个数值 - df_test_1=df_test_1.fillna(df_test.ffill()) - df_test_1=df_test_1.fillna(df_test_1.bfill()) + MisVal_Check = df_test.isnull().sum().sort_values(ascending=False) + # 去掉缺失值百分比>0.4的特征,去掉这些特征后的新表格命名为df_test_1 + df_MisVal_Check = pd.DataFrame(MisVal_Check,) + df_MisVal_Check_1 = df_MisVal_Check.reset_index() + df_MisVal_Check_1.columns = ['Variable_Name', 'Missing_Number'] + df_MisVal_Check_1['Missing_Number'] = df_MisVal_Check_1['Missing_Number'] / \ + len(df_test) + df_test_1 = df_test.drop( + df_MisVal_Check_1[df_MisVal_Check_1['Missing_Number'] > 0.4].Variable_Name, axis=1) + # 将缺失值补为前一个或者后一个数值 + df_test_1 = df_test_1.fillna(df_test.ffill()) + df_test_1 = df_test_1.fillna(df_test_1.bfill()) df_test_1["日期"] = pd.to_datetime(df_test_1["日期"]) df_test_1.index = df_test_1["日期"] - df_test_1 = df_test_1.drop(["日期"], axis= 1) - dataset1=df_test_1.drop('京博指导价',axis=1)#.astype(float) + df_test_1 = df_test_1.drop(["日期"], axis=1) + dataset1 = df_test_1.drop('京博指导价', axis=1) # .astype(float) - y=df_test_1['京博指导价'] + y = df_test_1['京博指导价'] - x=dataset1 + x = dataset1 train = x target = y - #切割数据样本集合测试集 - X_train,x_test,y_train,y_true = train_test_split(train,target,test_size=0.2,random_state=0) - - + # 切割数据样本集合测试集 + X_train, x_test, y_train, y_true = train_test_split( + train, target, test_size=0.2, random_state=0) + from sklearn.linear_model import Lasso from xgboost import XGBRegressor @@ -307,7 +303,7 @@ def optimize_Model(): import xgboost as xgb from xgboost import plot_importance, plot_tree from sklearn.metrics import mean_absolute_error - from statsmodels.tools.eval_measures import mse,rmse + from statsmodels.tools.eval_measures import mse, rmse from sklearn.model_selection import GridSearchCV from xgboost import XGBRegressor import warnings @@ -315,70 +311,93 @@ def optimize_Model(): from sklearn.metrics import mean_squared_error - #切割训练数据和样本数据 + # 切割训练数据和样本数据 from sklearn.model_selection import train_test_split - #用于模型评分 + # 用于模型评分 from sklearn.metrics import r2_score - #模型缩写 - Lasso = Lasso(random_state=0) + # 模型缩写 + Lasso = Lasso(random_state=0) XGBR = XGBRegressor(random_state=0) - Lasso.fit(X_train,y_train) - XGBR.fit(X_train,y_train) + Lasso.fit(X_train, y_train) + XGBR.fit(X_train, y_train) y_pre_Lasso = Lasso.predict(x_test) y_pre_XGBR = XGBR.predict(x_test) - #计算Lasso、XGBR、RandomForestR、AdaBoostR、GradientBoostingR、BaggingRegressor各模型的R² - Lasso_score = r2_score(y_true,y_pre_Lasso) - XGBR_score=r2_score(y_true,y_pre_XGBR) + # 计算Lasso、XGBR、RandomForestR、AdaBoostR、GradientBoostingR、BaggingRegressor各模型的R² + Lasso_score = r2_score(y_true, y_pre_Lasso) + XGBR_score = r2_score(y_true, y_pre_XGBR) - #计算Lasso、XGBR的MSE和RMSE - Lasso_MSE=mean_squared_error(y_true, y_pre_Lasso) - XGBR_MSE=mean_squared_error(y_true, y_pre_XGBR) + # 计算Lasso、XGBR的MSE和RMSE + Lasso_MSE = mean_squared_error(y_true, y_pre_Lasso) + XGBR_MSE = mean_squared_error(y_true, y_pre_XGBR) - Lasso_RMSE=np.sqrt(Lasso_MSE) - XGBR_RMSE=np.sqrt(XGBR_MSE) + Lasso_RMSE = np.sqrt(Lasso_MSE) + XGBR_RMSE = np.sqrt(XGBR_MSE) model_results = pd.DataFrame([['Lasso', Lasso_RMSE, Lasso_score], ['XgBoost', XGBR_RMSE, XGBR_score]], - columns = ['模型(Model)','均方根误差(RMSE)', 'R^2 score']) - model_results1=model_results.set_index('模型(Model)') + columns=['模型(Model)', '均方根误差(RMSE)', 'R^2 score']) + model_results1 = model_results.set_index('模型(Model)') - def plot_feature_importance(importance,names,model_type): + def plot_feature_importance(importance, names, model_type): feature_importance = np.array(importance) feature_names = np.array(names) - data={'feature_names':feature_names,'feature_importance':feature_importance} + data = {'feature_names': feature_names, + 'feature_importance': feature_importance} fi_df = pd.DataFrame(data) - fi_df.sort_values(by=['feature_importance'], ascending=False,inplace=True) + fi_df.sort_values(by=['feature_importance'], + ascending=False, inplace=True) - plt.figure(figsize=(10,8)) + plt.figure(figsize=(10, 8)) sn.barplot(x=fi_df['feature_importance'], y=fi_df['feature_names']) plt.title(model_type + " "+'FEATURE IMPORTANCE') plt.xlabel('FEATURE IMPORTANCE') plt.ylabel('FEATURE NAMES') + plt.savefig(f'{model_type}-沥青定量特征重要性.png') + from pylab import mpl mpl.rcParams['font.sans-serif'] = ['SimHei'] + + # from pylab import mpl + # mpl.rcParams['font.sans-serif'] = ['SimHei'] + + # 显示XGBoost模型的各特征重要性 + # 参考: https://www.analyseup.com/learn-python-for-data-science/python-random-forest-feature-importance-plot.html + # matplotlib.rc("font", family='MicroSoft YaHei', weight="bold") + # plot_feature_importance(XGBR.feature_importances_, + # X_train.columns, 'XGBoost') + + from pylab import mpl + + # mpl.rcParams['font.sans-serif'] = ['SimHei'] + + # 显示Lasso模型的各特征重要性 + # 参考: https://www.analyseup.com/learn-python-for-data-science/python-random-forest-feature-importance-plot.html + # matplotlib.rc("font", family='MicroSoft YaHei', weight="bold") + # plot_feature_importance(Lasso.coef_, X_train.columns, 'Lasso') + from xgboost import XGBRegressor from sklearn.model_selection import GridSearchCV estimator = XGBRegressor(random_state=0, - nthread=4, - seed=0 - ) + nthread=4, + seed=0 + ) parameters = { - 'max_depth': range (2, 11, 2), # 树的最大深度 - 'n_estimators': range (50, 101, 10), # 迭代次数 + 'max_depth': range(2, 11, 2), # 树的最大深度 + 'n_estimators': range(50, 101, 10), # 迭代次数 'learning_rate': [0.01, 0.03, 0.1, 0.3, 0.5, 1] } grid_search_XGB = GridSearchCV( estimator=estimator, param_grid=parameters, - # n_jobs = 10, - cv = 3, + # n_jobs = 10, + cv=3, verbose=True ) @@ -390,23 +409,24 @@ def optimize_Model(): print("\t%s: %r" % (param_name, best_parameters[param_name])) y_pred = grid_search_XGB.predict(x_test) - op_XGBR_score = r2_score(y_true,y_pred) - op_XGBR_MSE= mean_squared_error(y_true, y_pred) - op_XGBR_RMSE= np.sqrt(op_XGBR_MSE) + op_XGBR_score = r2_score(y_true, y_pred) + op_XGBR_MSE = mean_squared_error(y_true, y_pred) + op_XGBR_RMSE = np.sqrt(op_XGBR_MSE) model_results2 = pd.DataFrame([['Optimized_Xgboost', op_XGBR_RMSE, op_XGBR_score]], - columns = ['模型(Model)', '均方根误差(RMSE)', 'R^2 score']) - model_results2=model_results2.set_index('模型(Model)') + columns=['模型(Model)', '均方根误差(RMSE)', 'R^2 score']) + model_results2 = model_results2.set_index('模型(Model)') # results = model_results1.append(model_results2, ignore_index = False) - results = pd.concat([model_results1,model_results2],ignore_index=True) + results = pd.concat([model_results1, model_results2], ignore_index=True) results import pickle - Pkl_Filename = "日度价格预测_最佳模型.pkl" + Pkl_Filename = "日度价格预测_最佳模型.pkl" + + with open(Pkl_Filename, 'wb') as file: + pickle.dump(grid_search_XGB, file) - with open(Pkl_Filename, 'wb') as file: - pickle.dump(grid_search_XGB, file) def read_xls_data(): """获取特征项ID""" @@ -417,6 +437,7 @@ def read_xls_data(): one_cols = df.iloc[1].tolist()[1:] print(f'获取到的数据项ID{one_cols}') + def get_queryDataListItemNos_value(token, url, dataItemNoList, dateStart, dateEnd): search_data = { @@ -430,7 +451,8 @@ def get_queryDataListItemNos_value(token, url, dataItemNoList, dateStart, dateEn } headers = {"Authorization": token} - search_res = requests.post(url=url, headers=headers, json=search_data, timeout=(3, 5)) + search_res = requests.post( + url=url, headers=headers, json=search_data, timeout=(3, 5)) search_value = json.loads(search_res.text)["data"] if search_value: return search_value @@ -438,8 +460,8 @@ def get_queryDataListItemNos_value(token, url, dataItemNoList, dateStart, dateEn return None -def save_queryDataListItemNos_xls(data_df,dataItemNoList): - from datetime import datetime,timedelta +def save_queryDataListItemNos_xls(data_df, dataItemNoList): + from datetime import datetime, timedelta current_year_month = datetime.now().strftime('%Y-%m') grouped = data_df.groupby("dataDate") @@ -449,11 +471,11 @@ def save_queryDataListItemNos_xls(data_df,dataItemNoList): # 创建新工作簿 new_workbook = load_workbook('沥青数据项.xlsx') - + for sheetname in workbook.sheetnames: sheet = workbook[sheetname] new_sheet = new_workbook[sheetname] - + current_year_month_row = 0 # 查找当前月份数据起始行 for row_idx, row in enumerate(sheet.iter_rows(values_only=True), 1): @@ -467,13 +489,462 @@ def save_queryDataListItemNos_xls(data_df,dataItemNoList): new_sheet.cell(row=row_idx, column=1, value=date) for j, dataItemNo in enumerate(dataItemNoList, start=2): if group[group["dataItemNo"] == dataItemNo]["dataValue"].values: - new_sheet.cell(row=row_idx, column=j, - value=group[group["dataItemNo"] == dataItemNo]["dataValue"].values[0]) + new_sheet.cell(row=row_idx, column=j, + value=group[group["dataItemNo"] == dataItemNo]["dataValue"].values[0]) # 保存修改后的xlsx文件 new_workbook.save("沥青数据项.xlsx") +def save_recent_two_months_data_xls(data_df, dataItemNoList, filename='沥青数据项.xlsx', dateEnd=None): + """ + 只更新近两个月的数据,其他历史数据保持不变 + 从结束日期往回推算两个月,只更新这个时间范围内的数据行 + 保留源文件的前两行(中文列名+ID行)和两个月之前的所有历史数据 + 参数: + data_df (DataFrame): 包含 dataDate, dataItemNo, dataValue 的数据 + dataItemNoList (list): 数据项编号列表 + filename (str): 保存的Excel文件名 + dateEnd (str): 结束日期,YYYYMMDD格式,默认为当前日期 + """ + try: + from datetime import datetime, timedelta + + print(f'开始更新近两个月数据到 {filename}') + + # 处理结束日期,默认为当前日期 + if dateEnd is None: + end_date = datetime.now() + else: + # 解析YYYYMMDD格式的日期 + try: + end_date = datetime.strptime(str(dateEnd), '%Y%m%d') + except ValueError: + print(f'日期格式错误: {dateEnd},使用当前日期') + end_date = datetime.now() + + # 从结束日期往回推算两个月(60天) + two_months_ago = end_date - timedelta(days=60) + cutoff_date = two_months_ago.strftime('%Y-%m-%d') + end_date_str = end_date.strftime('%Y-%m-%d') + + print(f'结束日期: {end_date_str} (dateEnd: {dateEnd})') + print(f'数据更新范围: {cutoff_date} 到 {end_date_str} (近两个月)') + print(f'超过两个月前的数据将保持不变') + + # 筛选需要更新的近两个月数据 + data_df_copy = data_df.copy() + data_df_copy["dataDate"] = pd.to_datetime(data_df_copy["dataDate"]) + recent_data = data_df_copy[ + (data_df_copy["dataDate"] >= two_months_ago) & + (data_df_copy["dataDate"] <= end_date) + ] + + print(f'原始数据总数: {len(data_df)}') + print(f'需要更新的近两个月数据: {len(recent_data)}') + + if recent_data.empty: + print('❌ 没有需要更新的近两个月数据') + return + + # 将日期转回字符串格式 + recent_data["dataDate"] = recent_data["dataDate"].dt.strftime( + '%Y-%m-%d') + + # 读取现有Excel文件 + try: + df_existing = pd.read_excel(filename, header=None) + # 保留前两行:第一行中文列名,第二行ID + header_rows = df_existing.iloc[0:2].copy() + existing_columns = df_existing.columns.tolist() + + # 分离现有数据(从第三行开始) + if len(df_existing) > 2: + existing_data_rows = df_existing.iloc[2:].copy() + # 将第一列转换为日期进行比较 + existing_data_rows.iloc[:, 0] = pd.to_datetime( + existing_data_rows.iloc[:, 0], errors='coerce') + + # 分离出需要保留的历史数据(两个月之前的数据) + old_data_mask = existing_data_rows.iloc[:, 0] < two_months_ago + # 显式使用copy() + old_data = existing_data_rows[old_data_mask].copy() + + # 安全地将日期转回字符串 + if not old_data.empty: + # 先检查是否已经是datetime类型 + if pd.api.types.is_datetime64_any_dtype(old_data.iloc[:, 0]): + old_data.iloc[:, 0] = old_data.iloc[:, + 0].dt.strftime('%Y-%m-%d') + else: + # 如果不是datetime类型,先转换再格式化 + try: + old_data_dates = pd.to_datetime( + old_data.iloc[:, 0], errors='coerce') + if old_data_dates.notna().any(): + old_data.iloc[:, 0] = old_data_dates.dt.strftime( + '%Y-%m-%d') + else: + # 如果转换失败,保持原始值 + print('警告:历史数据日期转换失败,保持原始格式') + except Exception as e: + print(f'警告:历史数据日期处理出错: {str(e)},保持原始格式') + + print(f'文件中现有数据行数: {len(existing_data_rows)}') + print(f'保留的两个月前历史数据: {len(old_data)} 行') + else: + old_data = pd.DataFrame(columns=existing_columns) + print('文件中没有现有数据行') + + print(f'使用现有文件的表头结构: {len(existing_columns)} 列') + + except FileNotFoundError: + # 如果文件不存在,创建新的前两行结构 + chinese_names = ['日期'] + \ + [f'数据项{i}' for i in range(len(dataItemNoList))] + id_row = ['日期'] + [str(item) for item in dataItemNoList] + + max_cols = max(len(chinese_names), len(id_row)) + existing_columns = list(range(max_cols)) + + header_data = [] + header_data.append( + chinese_names + [''] * (max_cols - len(chinese_names))) + header_data.append(id_row + [''] * (max_cols - len(id_row))) + + header_rows = pd.DataFrame(header_data, columns=existing_columns) + old_data = pd.DataFrame(columns=existing_columns) + print(f'创建新的表头结构: {len(existing_columns)} 列') + + # 处理近两个月的新数据,按日期分组 + grouped = recent_data.groupby("dataDate") + + # 构建数据列表 + new_data_rows = [] + mapping_debug_info = [] # 添加调试信息 + + print(f"\n🔍 开始数据映射过程:") + print(f"📋 按日期分组数: {len(grouped)}") + + # 逐日期处理数据 + for date, group in grouped: + print(f"\n📅 处理日期: {date}, 记录数: {len(group)}") + + # 创建新行数据,初始化为空值 + row_data = [''] * len(existing_columns) + row_data[0] = date # 第一列是日期 + + # 根据第二行的ID来匹配数据 + id_row_values = header_rows.iloc[1].tolist() + + mapped_count = 0 + unmapped_items = [] + + # 遍历每个数据项,找到对应的列位置 + for dataItemNo in dataItemNoList: + dataItemNo_str = str(dataItemNo) + + # 在ID行中查找对应的列位置 - 支持字符串和数字类型匹配 + col_index = None + try: + # 首先尝试精确字符串匹配 + col_index = id_row_values.index(dataItemNo_str) + except ValueError: + # 如果字符串匹配失败,尝试数据类型转换匹配 + try: + # 尝试将dataItemNo转换为数字进行匹配 + if dataItemNo_str.isdigit(): + dataItemNo_num = int(dataItemNo_str) + col_index = id_row_values.index(dataItemNo_num) + else: + # 尝试将Excel中的数字ID转换为字符串匹配 + for i, excel_id in enumerate(id_row_values): + if str(excel_id) == dataItemNo_str: + col_index = i + break + except (ValueError, TypeError): + pass + + if col_index is not None: + # 查找对应的dataValue + matching_rows = group[group["dataItemNo"] + == dataItemNo_str] + if not matching_rows.empty: + data_value = matching_rows["dataValue"].iloc[0] + + # 安全检查索引范围 + if col_index < len(row_data): + row_data[col_index] = data_value + mapped_count += 1 + mapping_debug_info.append( + f"{date}|{dataItemNo_str}|{col_index}|{data_value}|成功") + else: + unmapped_items.append(f"{dataItemNo_str}(索引越界)") + mapping_debug_info.append( + f"{date}|{dataItemNo_str}|{col_index}|{data_value}|索引越界") + else: + mapping_debug_info.append( + f"{date}|{dataItemNo_str}|{col_index}|NULL|无数据值") + else: + # 如果在ID行中没找到对应的数据项,跳过 + unmapped_items.append(f"{dataItemNo_str}(未找到列)") + mapping_debug_info.append( + f"{date}|{dataItemNo_str}|N/A|N/A|未找到列") + continue + + print(f" 📊 该日期成功映射: {mapped_count}/{len(dataItemNoList)}") + if unmapped_items and len(unmapped_items) <= 3: + print(f" ⚠️ 未映射: {unmapped_items}") + + new_data_rows.append(row_data) + + print(f"\n📈 数据更新统计:") + print(f" 更新日期数: {len(new_data_rows)}") + print(f" 映射记录数: {len(mapping_debug_info)}") + + # 创建新数据DataFrame + df_new_data = pd.DataFrame(new_data_rows, columns=existing_columns) + + # 合并历史数据和新数据 + if not old_data.empty: + # 有历史数据需要保留 + all_data_rows = pd.concat( + [old_data, df_new_data], ignore_index=True) + print(f"📝 合并数据: {len(old_data)} 行历史数据 + {len(df_new_data)} 行新数据") + else: + # 没有历史数据 + all_data_rows = df_new_data + print(f"📝 新建数据: {len(df_new_data)} 行") + + # 按日期排序 + try: + date_column = all_data_rows.iloc[:, 0] + date_column_dt = pd.to_datetime(date_column, errors='coerce') + + if date_column_dt.notna().any(): + all_data_rows = all_data_rows.iloc[date_column_dt.argsort()] + all_data_rows.iloc[:, 0] = date_column_dt.dt.strftime( + '%Y-%m-%d') + else: + print('警告:日期格式转换失败,保持原始格式') + except Exception as date_error: + print(f'日期处理出现问题,保持原始格式: {str(date_error)}') + + # 重置索引 + all_data_rows = all_data_rows.reset_index(drop=True) + + # 合并前两行和所有数据行 + df_final = pd.concat([header_rows, all_data_rows], ignore_index=True) + + # 保存到Excel文件 + df_final.to_excel(filename, index=False, + header=False, engine='openpyxl') + + print(f'✅ 成功更新近两个月数据到 {filename}') + print(f'📊 文件总行数: {len(df_final)} (前两行标题 + {len(all_data_rows)} 行数据)') + if len(all_data_rows) > 0: + print( + f'📅 数据日期范围: {all_data_rows.iloc[:, 0].min()} 到 {all_data_rows.iloc[:, 0].max()}') + + except Exception as e: + print(f'❌ 更新近两个月数据时发生错误: {str(e)}') + import traceback + traceback.print_exc() + raise + + +def save_all_historical_data_xls(data_df, dataItemNoList, filename='沥青数据项.xlsx'): + """ + 保存所有历史日期的数据,不受日期限制 + 保留源文件的前两行(中文列名+ID行),然后追加数据 + 参数: + data_df (DataFrame): 包含 dataDate, dataItemNo, dataValue 的数据 + dataItemNoList (list): 数据项编号列表 + filename (str): 保存的Excel文件名 + """ + try: + print(f'开始保存所有历史数据到 {filename}') + + # 按日期分组数据 + grouped = data_df.groupby("dataDate") + print(f'总共有 {len(grouped)} 个日期的数据') + + # 读取现有Excel文件的前两行结构 + try: + df_existing = pd.read_excel( + filename, header=None) # 不指定header,保持原始结构 + # 保留前两行:第一行中文列名,第二行ID + header_rows = df_existing.iloc[0:2].copy() + existing_columns = df_existing.columns.tolist() # 使用数字列索引 + print(f'使用现有文件的表头结构: {len(existing_columns)} 列') + print(f'第一行(中文列名): {header_rows.iloc[0].tolist()}') + print(f'第二行(ID编号): {header_rows.iloc[1].tolist()}') + + except FileNotFoundError: + # 如果文件不存在,创建新的前两行结构 + # 第一行:中文列名(假设第一列是日期,其他列需要从现有数据推断) + chinese_names = ['日期'] + \ + [f'数据项{i}' for i in range(len(dataItemNoList))] + # 第二行:ID编号 + id_row = ['日期'] + [str(item) for item in dataItemNoList] + + # 创建前两行DataFrame + max_cols = max(len(chinese_names), len(id_row)) + existing_columns = list(range(max_cols)) + + header_data = [] + header_data.append( + chinese_names + [''] * (max_cols - len(chinese_names))) + header_data.append(id_row + [''] * (max_cols - len(id_row))) + + header_rows = pd.DataFrame(header_data, columns=existing_columns) + print(f'创建新的表头结构: {len(existing_columns)} 列') + + # 构建数据列表 + all_data = [] + mapping_debug_info = [] # 添加调试信息 + + print(f"\n🔍 开始数据映射过程:") + print(f"📋 按日期分组数: {len(grouped)}") + + # 逐日期处理数据 + for date, group in grouped: + print(f"\n📅 处理日期: {date}, 记录数: {len(group)}") + + # 创建新行数据,初始化为空值 + row_data = [''] * len(existing_columns) + row_data[0] = date # 第一列是日期 + + # 根据第二行的ID来匹配数据 + id_row_values = header_rows.iloc[1].tolist() + print(f"🏷️ ID行总数: {len(id_row_values)}") + + mapped_count = 0 + unmapped_items = [] + + # 遍历每个数据项,找到对应的列位置 + for dataItemNo in dataItemNoList: + dataItemNo_str = str(dataItemNo) + + # 在ID行中查找对应的列位置 - 支持字符串和数字类型匹配 + col_index = None + try: + # 首先尝试精确字符串匹配 + col_index = id_row_values.index(dataItemNo_str) + except ValueError: + # 如果字符串匹配失败,尝试数据类型转换匹配 + try: + # 尝试将dataItemNo转换为数字进行匹配 + if dataItemNo_str.isdigit(): + dataItemNo_num = int(dataItemNo_str) + col_index = id_row_values.index(dataItemNo_num) + print( + f" 🔄 {dataItemNo_str} -> 数字匹配成功,列{col_index}") + else: + # 尝试将Excel中的数字ID转换为字符串匹配 + for i, excel_id in enumerate(id_row_values): + if str(excel_id) == dataItemNo_str: + col_index = i + print( + f" 🔄 {dataItemNo_str} -> 类型转换匹配成功,列{col_index}") + break + except (ValueError, TypeError): + pass + + if col_index is not None: + # 查找对应的dataValue + matching_rows = group[group["dataItemNo"] + == dataItemNo_str] + if not matching_rows.empty: + data_value = matching_rows["dataValue"].iloc[0] + + # 安全检查索引范围 + if col_index < len(row_data): + row_data[col_index] = data_value + mapped_count += 1 + print( + f" ✅ {dataItemNo_str} -> 列{col_index} = {data_value}") + mapping_debug_info.append( + f"{date}|{dataItemNo_str}|{col_index}|{data_value}|成功") + else: + print( + f" ❌ {dataItemNo_str} -> 列索引{col_index}超出范围{len(row_data)}") + unmapped_items.append(f"{dataItemNo_str}(索引越界)") + mapping_debug_info.append( + f"{date}|{dataItemNo_str}|{col_index}|{data_value}|索引越界") + else: + print(f" ⚠️ {dataItemNo_str} -> 在列{col_index}但无数据值") + mapping_debug_info.append( + f"{date}|{dataItemNo_str}|{col_index}|NULL|无数据值") + else: + # 如果在ID行中没找到对应的数据项,跳过 + print(f" ❌ {dataItemNo_str} -> 未找到列位置(已尝试字符串和数字匹配)") + unmapped_items.append(f"{dataItemNo_str}(未找到列)") + mapping_debug_info.append( + f"{date}|{dataItemNo_str}|N/A|N/A|未找到列") + continue + + print(f" 📊 该日期成功映射: {mapped_count}/{len(dataItemNoList)}") + if unmapped_items: + print( + f" ⚠️ 未映射的数据项: {unmapped_items[:3]}{'...' if len(unmapped_items) > 3 else ''}") + + all_data.append(row_data) + + print(f"\n📈 数据更新统计:") + print(f" 更新日期数: {len(new_data_rows)}") + print(f" 映射记录数: {len(mapping_debug_info)}") + + # 保存调试信息到文件 + debug_filename = f'mapping_debug_{datetime.now().strftime("%Y%m%d_%H%M%S")}.txt' + with open(debug_filename, 'w', encoding='utf-8') as f: + f.write("\n".join(mapping_debug_info)) + print(f" 📄 映射调试信息已保存到: {debug_filename}") + + # 创建新数据DataFrame + df_new_data = pd.DataFrame(new_data_rows, columns=existing_columns) + + # 处理日期排序 - 修复datetime转换问题 + try: + # 先将第一列转换为datetime,处理可能的转换失败 + date_column = df_new_data.iloc[:, 0] + date_column_dt = pd.to_datetime(date_column, errors='coerce') + + # 检查是否有有效的datetime值 + if date_column_dt.notna().any(): + # 按日期排序 + df_new_data = df_new_data.iloc[date_column_dt.argsort()] + # 将日期转换为字符串格式 + df_new_data.iloc[:, 0] = date_column_dt.dt.strftime('%Y-%m-%d') + else: + # 如果日期转换全部失败,保持原始格式 + print('警告:日期格式转换失败,保持原始格式') + + except Exception as date_error: + print(f'日期处理出现问题,保持原始格式: {str(date_error)}') + + # 重置索引 + df_new_data = df_new_data.reset_index(drop=True) + + # 合并前两行和数据行 + df_final = pd.concat([header_rows, df_new_data], ignore_index=True) + + # 保存到Excel文件(不包含pandas的列名) + df_final.to_excel(filename, index=False, + header=False, engine='openpyxl') + + print(f'成功保存 {len(all_data)} 行数据到 {filename}') + if len(all_data) > 0: + print( + f'数据日期范围: {df_new_data.iloc[:, 0].min()} 到 {df_new_data.iloc[:, 0].max()}') + + except Exception as e: + print(f'保存历史数据时发生错误: {str(e)}') + import traceback + traceback.print_exc() + raise + + # def save_queryDataListItemNos_xls(data_df,dataItemNoList): # from datetime import datetime, timedelta # current_year_month = datetime.now().strftime('%Y-%m') @@ -544,10 +1015,15 @@ def save_queryDataListItemNos_xls(data_df,dataItemNoList): def queryDataListItemNos(token=None): df = pd.read_excel('沥青数据项.xlsx') dataItemNoList = df.iloc[0].tolist()[1:] - + # float 转字符串,不要小数位 + dataItemNoList = [str(int(item)) if isinstance(item, float) and not pd.isna( + item) else str(item) for item in dataItemNoList] + + # 打印列表长度,检查是否超过50个限制 + print(f'数据项总数: {len(dataItemNoList)}') + if token is None: token = get_head_auth() - if not token: print('token获取失败') return @@ -556,22 +1032,49 @@ def queryDataListItemNos(token=None): from datetime import datetime, timedelta current_date = datetime.now() - # 获取当月1日 - first_day_of_month = current_date.replace(day=1) + # 从结束日期往回推算两个月,只获取近两个月的数据 + two_months_ago = current_date - timedelta(days=60) # 格式化为 YYYYMMDD 格式 dateEnd = current_date.strftime('%Y%m%d') - dateStart = first_day_of_month.strftime('%Y%m%d') + dateStart = two_months_ago.strftime('%Y%m%d') # 从两个月前开始 - search_value = get_queryDataListItemNos_value(token, queryDataListItemNos_url, dataItemNoList, dateStart, dateEnd) + print(f'📅 数据获取范围: {dateStart} 到 {dateEnd} (近两个月)') + # 将数据项列表分批处理,每批最多50个 + batch_size = 50 + all_search_values = [] - data_df = pd.DataFrame(search_value) + for i in range(0, len(dataItemNoList), batch_size): + batch = dataItemNoList[i:i + batch_size] + print(f'处理第 {i//batch_size + 1} 批,共 {len(batch)} 个数据项') + search_value = get_queryDataListItemNos_value( + token, queryDataListItemNos_url, batch, dateStart, dateEnd) + + if search_value: + all_search_values.extend(search_value) + else: + print(f'第 {i//batch_size + 1} 批数据获取失败') + + if not all_search_values: + print('所有批次数据获取失败') + return + + print(f'总共获取到 {len(all_search_values)} 条数据记录') + + # 合并所有批次的数据 + data_df = pd.DataFrame(all_search_values) data_df["dataDate"] = pd.to_datetime(data_df["dataDate"]) data_df["dataDate"] = data_df["dataDate"].dt.strftime('%Y-%m-%d') - save_queryDataListItemNos_xls(data_df,dataItemNoList) - print('当月数据更新完成') + + # # 使用完整的数据项列表保存数据 + # save_queryDataListItemNos_xls(data_df, dataItemNoList) + # print('当月数据更新完成') + # 保存新的数据 + # save_all_historical_data_xls(data_df, dataItemNoList) + # 保存近两个月数据(新方法,从dateEnd往回推算两个月) + save_recent_two_months_data_xls(data_df, dataItemNoList, dateEnd=dateEnd) def save_xls_1(append_rows): @@ -615,8 +1118,7 @@ def save_xls_1(append_rows): new_sheet.write(row_count, col, append_rows[col]) # 保存新的xls文件 - new_workbook.save("沥青数据项.xlsx") - + new_workbook.save("沥青数据项.xlsx") def start(date=''): @@ -625,10 +1127,10 @@ def start(date=''): token = get_head_auth() if not token: return - - cur_time,cur_time2 = getNow(date) + + cur_time, cur_time2 = getNow(date) print(f"获取{cur_time}数据") - datas = get_data_value(token, one_cols,date=cur_time) + datas = get_data_value(token, one_cols, date=cur_time) print(len(datas)) print(datas) if not datas: @@ -641,15 +1143,16 @@ def start(date=''): print(data_value) dataItemNo_dataValue[data_value["dataItemNo"]] = "" else: - dataItemNo_dataValue[data_value["dataItemNo"]] = data_value["dataValue"] - + dataItemNo_dataValue[data_value["dataItemNo"] + ] = data_value["dataValue"] + for value in one_cols: if value in dataItemNo_dataValue: append_rows.append(dataItemNo_dataValue[value]) else: append_rows.append("") - - print('添加的行:',len(append_rows),append_rows) + + print('添加的行:', len(append_rows), append_rows) save_xls_2(append_rows) @@ -661,15 +1164,15 @@ def save_xls_2(append_rows): # try: # 读取现有数据(假设第一行为列名) df = pd.read_excel('沥青数据项.xlsx', sheet_name=0) - print('文件中的数据列数:',len(df.columns),df.columns) + print('文件中的数据列数:', len(df.columns), df.columns) # 转换append_rows为DataFrame if len(append_rows) != len(df.columns): # 去除第二个元素 ,不知道什么原因多一个空数据 append_rows.pop(1) - append_rows = pd.DataFrame([append_rows],columns=df.columns) + append_rows = pd.DataFrame([append_rows], columns=df.columns) # 创建新数据行 new_date = append_rows['日期'].values[0] - + dates = df['日期'].to_list() # 判断日期是否存在 if new_date in dates: @@ -684,10 +1187,10 @@ def save_xls_2(append_rows): print(df.head()) print(df.tail()) print(f"插入 {new_date} 新数据") - + # 保存更新后的数据 df.to_excel('沥青数据项.xlsx', index=False, engine='openpyxl') - + # except FileNotFoundError: # # 如果文件不存在则创建新文件 # pd.DataFrame([append_rows]).to_excel('沥青数据项.xlsx', index=False, engine='openpyxl') @@ -695,7 +1198,7 @@ def save_xls_2(append_rows): # print(f"保存数据时发生错误: {str(e)}") -def main(start_date=None,token=None,token_push=None): +def main(start_date=None, token=None, token_push=None): from datetime import datetime, timedelta if start_date is None: start_date = datetime.now() @@ -711,8 +1214,9 @@ def main(start_date=None,token=None,token_push=None): # 训练模型 optimize_Model() # # 预测&上传预测结果 - upload_data_to_system(token_push,start_date) + upload_data_to_system(token_push, start_date) + if __name__ == "__main__": print("运行中ing...") - main() \ No newline at end of file + main() diff --git a/aisenzhecode/沥青/沥青定量预测2025年10月9日.zip b/aisenzhecode/沥青/沥青定量预测2025年10月9日.zip new file mode 100644 index 0000000..a15bcab Binary files /dev/null and b/aisenzhecode/沥青/沥青定量预测2025年10月9日.zip differ diff --git a/aisenzhecode/沥青/沥青数据项 - 副本.xlsx b/aisenzhecode/沥青/沥青数据项 - 副本.xlsx new file mode 100644 index 0000000..8eaa183 Binary files /dev/null and b/aisenzhecode/沥青/沥青数据项 - 副本.xlsx differ diff --git a/aisenzhecode/沥青/沥青数据项.xlsx b/aisenzhecode/沥青/沥青数据项.xlsx index 7c619f5..261e933 100644 Binary files a/aisenzhecode/沥青/沥青数据项.xlsx and b/aisenzhecode/沥青/沥青数据项.xlsx differ diff --git a/aisenzhecode/沥青/沥青数据项2025年9月26日备份.xlsx b/aisenzhecode/沥青/沥青数据项2025年9月26日备份.xlsx new file mode 100644 index 0000000..c1e67f8 Binary files /dev/null and b/aisenzhecode/沥青/沥青数据项2025年9月26日备份.xlsx differ diff --git a/config_jingbo.py b/config_jingbo.py index b276e8a..1344bb1 100644 --- a/config_jingbo.py +++ b/config_jingbo.py @@ -93,7 +93,7 @@ data = { ClassifyId = 1214 -# # 变量定义--线上环境 +# 变量定义--线上环境 # server_host = '10.200.32.39' # login_pushreport_url = "http://10.200.32.39/jingbo-api/api/server/login" # upload_url = "http://10.200.32.39/jingbo-api/api/analysis/reportInfo/researchUploadReportSave" @@ -218,6 +218,27 @@ ClassifyId = 1214 # } +# 套期保值正式环境start +tqbz_login_url = "http://10.200.32.39/jbsh/api/server/login" +tqbz_login_data = { + "funcModule": "login", + "funcOperation": "login", + "data": { + "account": "api_szh", + "password": "ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=", + "tenantHashCode":"7145255749fb7d61263081fca3cb1469", + "terminal": "API" + } +} +query_is_trading_day_url = "http://10.200.32.39/jbsh/api/dd/futuresCalendar/checkTradeDay" +is_trading_day_data = { + "funcModule": "检查是否交易日", + "funcOperation": "检查是否交易日", + "data": "20251010" +} +# 套期保值正式环境end + + # # 生产环境数据库 # host = 'rm-2zehj3r1n60ttz9x5.mysql.rds.aliyuncs.com' # port = 3306 @@ -227,7 +248,7 @@ ClassifyId = 1214 # table_name = 'v_tbl_crude_oil_warning' -# # 变量定义--测试环境 +# 变量定义--测试环境 server_host = '192.168.100.53' # 内网 # server_host = '183.242.74.28' # 外网 login_pushreport_url = f"http://{server_host}:8080/jingbo-dev/api/server/login" @@ -342,9 +363,35 @@ push_waring_data_value_list_data = { get_waring_data_value_list_data = { "data": "8", "funcModule": "商品数据同步", "funcOperation": "同步"} +# 套期保值北京环境start +tqbz_login_url = "http://192.168.101.197:8080/jbsh/api/server/login" +tqbz_login_data = { + "funcModule": "login", + "funcOperation": "login", + "data": { + "account": "apitest", + "password": "ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=", + "tenantHashCode": "c5207389b0a254149fccc69c9c68397e", + "terminal": "API" + } +} +query_is_trading_day_url = "http://192.168.101.197:8080/jbsh/api/dd/futuresCalendar/checkTradeDay" +is_trading_day_data = { + "funcModule": "检查是否交易日", + "funcOperation": "检查是否交易日", + "data": "20251010" +} +# 套期保值北京环境end + + + + # 八大维度数据项编码 bdwd_items = { 'ciri': 'yyycbdwdcr', + 'cierri': 'yyycbdwdcer', + 'cisanri': 'yyycbdwdcsanr', + 'cisiri': 'yyycbdwdcsir', 'benzhou': 'yyycbdwdbz', 'cizhou': 'yyycbdwdcz', 'gezhou': 'yyycbdwdgz', @@ -373,15 +420,17 @@ DEFAULT_CONFIG = { 'create_user': 'admin', 'create_date': datetime.datetime.now(), 'update_user': 'admin', - 'update_date': datetime.datetime.now() + 'update_date': datetime.datetime.now(), + 'oil_code': 'CRUDE', + 'oil_name': '原油', } # 开关 -is_train = True # 是否训练 +is_train = False # 是否训练 is_debug = False # 是否调试 -is_eta = True # 是否使用eta接口 -is_market = True # 是否通过市场信息平台获取特征 ,在is_eta 为true 的情况下生效 +is_eta = False # 是否使用eta接口 +is_market = False # 是否通过市场信息平台获取特征 ,在is_eta 为true 的情况下生效 is_timefurture = True # 是否使用时间特征 is_fivemodels = False # 是否使用之前保存的最佳的5个模型 is_edbcode = False # 特征使用edbcoding列表中的 diff --git a/config_jingbo_yuedu.py b/config_jingbo_yuedu.py index 96fb729..4ef18b2 100644 --- a/config_jingbo_yuedu.py +++ b/config_jingbo_yuedu.py @@ -39,6 +39,7 @@ edbnamelist = [ # '新加坡(含硫0.05%) 柴油现货价','柴油:10ppm:国际市场:FOB中间价:新加坡(日)','Bloomberg Commodity Fair Value Singapore Mogas 92 Swap Month 1','97#汽油FOB新加坡现货价','无铅汽油:97#:国际市场:FOB中间价:新加坡(日)' ] +edbcodenamedict = {} # eta自有数据指标编码 次月,次二月,次三月,次四月 bdwdname = [ @@ -47,6 +48,13 @@ bdwdname = [ '次三月', '次四月', ] + +# 数据库预测结果表八大维度列名 +price_columns = [ + 'day_price', 'week_price', 'second_week_price', 'next_week_price', + 'next_month_price', 'next_february_price', 'next_march_price', 'next_april_price' +] + modelsindex = [ { "NHITS": "SELF0000143", @@ -180,6 +188,7 @@ ClassifyId = 1214 # query_data_list_item_nos_url = f"http://{server_host}/jingbo-api/api/warehouse/dwDataItem/queryDataListItemNos" # # 上传数据项值 # push_data_value_list_url = f"http://{server_host}/jingbo-api/api/dw/dataValue/pushDataValueList" +# push_png_report_url = f"http://{server_host}/jingbo-api/api/analysis/reportInfo/priceForecastImg" # login_data = { # "data": { @@ -253,6 +262,21 @@ ClassifyId = 1214 # } # ] # } + +# push_png_report_data = { +# "funcModule": '聚烯烃图片报告', +# "funcOperation": '上传聚烯烃PP价格预测图片报告', +# "data": { +# "groupNo": "000211", +# "updateTime": "2024-09-06 15:01:29", +# "fileBase64": '', # 文件内容base64 +# "title": '2025年8月5日日度周度预测结果', +# "billNo": '', +# "pushContent": "" +# } +# } + + # # 八大维度数据项编码 # bdwd_items = { # 'ciri': '原油大数据预测|FORECAST|PRICE|T', @@ -291,6 +315,10 @@ upload_warning_url = f"http://{server_host}/jingbo-dev/api/basicBuiness/crudeOil query_data_list_item_nos_url = f"http://{server_host}/jingbo-dev/api/warehouse/dwDataItem/queryDataListItemNos" # 上传数据项值 push_data_value_list_url = f"http://{server_host}/jingbo-dev/api/dw/dataValue/pushDataValueList" +# 上传图片报告 +push_png_report_url = f"http://{server_host}/jingbo-dev/api/analysis/reportInfo/priceForecastImg" +# 报告审核 +upload_report_audit_url = f"http://{server_host}/jingbo-dev/api/analysis/reportInfo/researchImgReportSave" login_data = { "data": { @@ -364,6 +392,43 @@ push_data_value_list_data = { } ] } + + +push_png_report_data = { + "funcModule": '研究报告信息', + "funcOperation": '上传原油预测报告', + "data": { + "groupNo": "000161", + "updateTime": "2024-09-06 15:01:29", + "fileBase64": '', # 文件内容base64 + "title": '2025年8月5日日度周度预测结果', + "billNo": '', + "pushContent": "2025年8月5日日度周度预测结果" + } +} + + +upload_report_audit_data = { + "groupNo": '000197', # 用户组id + "funcModule": '研究报告信息', + "funcOperation": '原油价格预测报告审核预览', + "data": { + "ownerAccount": 'arui', # 报告所属用户账号 + "reportType": 'OIL_PRICE_FORECAST', # 报告类型,固定为OIL_PRICE_FORECAST + "fileName": '2000-40-5-50--100-原油指标数据.xlsx-Brent活跃合约--2024-09-06-15-01-29-预测报告.pdf', # 文件名称 + "fileBase64": '', # 文件内容base64 + "imgFileBase64": '', # 图片文件内容base64 + "categoryNo": 'yyjgycbg', # 研究报告分类编码 + "smartBusinessClassCode": 'YCJGYCBG', # 分析报告分类编码 + "reportEmployeeCode": "E40116", # 报告人 + "reportDeptCode": "D0044", # 报告部门 + "productGroupCode": "RAW_MATERIAL", # 商品分类 + "imgGroupNo": '000197', # 推送图片接口人员用户组编码 + "authGroupNo":'000197', # 权限用户组编码 + + } +} + # 八大维度数据项编码 bdwd_items = { 'ciri': 'yyycbdwdcr', @@ -397,12 +462,14 @@ DEFAULT_CONFIG = { 'create_user': 'admin', 'create_date': datetime.datetime.now(), 'update_user': 'admin', - 'update_date': datetime.datetime.now() + 'update_date': datetime.datetime.now(), + 'oil_code': 'CRUDE', + 'oil_name': '原油', } # 开关 -is_train = True # 是否训练 +is_train = False # 是否训练 is_debug = False # 是否调试 is_eta = False # 是否使用eta接口 is_market = False # 是否通过市场信息平台获取特征 ,在is_eta 为true 的情况下生效 @@ -410,7 +477,7 @@ is_timefurture = True # 是否使用时间特征 is_fivemodels = False # 是否使用之前保存的最佳的5个模型 is_edbcode = False # 特征使用edbcoding列表中的 is_edbnamelist = False # 自定义特征,对应上面的edbnamelist -is_update_eta = True # 预测结果上传到eta +is_update_eta = False # 预测结果上传到eta is_update_report = False # 是否上传报告 is_update_warning_data = False # 是否上传预警数据 is_update_predict_value = True # 是否上传预测值到市场信息平台 diff --git a/config_jingbo_zhoudu.py b/config_jingbo_zhoudu.py index fd3ecbc..59e3531 100644 --- a/config_jingbo_zhoudu.py +++ b/config_jingbo_zhoudu.py @@ -306,6 +306,9 @@ push_data_value_list_data = { # 八大维度数据项编码 bdwd_items = { 'ciri': 'yyycbdwdcr', + 'cierri': 'yyycbdwdcer', + 'cisanri': 'yyycbdwdcsanr', + 'cisiri': 'yyycbdwdcsir', 'benzhou': 'yyycbdwdbz', 'cizhou': 'yyycbdwdcz', 'gezhou': 'yyycbdwdgz', @@ -324,7 +327,6 @@ password = '123456' dbname = 'jingbo_test' table_name = 'v_tbl_crude_oil_warning' - DEFAULT_CONFIG = { 'feature_factor_frequency': 'D', 'strategy_id': 1, @@ -335,12 +337,15 @@ DEFAULT_CONFIG = { 'create_user': 'admin', 'create_date': datetime.datetime.now(), 'update_user': 'admin', - 'update_date': datetime.datetime.now() + 'update_date': datetime.datetime.now(), + 'oil_code': 'CRUDE', + 'oil_name': '原油', } + # 开关 -is_train = True # 是否训练 +is_train = False # 是否训练 is_debug = False # 是否调试 is_eta = False # 是否使用eta接口 is_market = False # 是否通过市场信息平台获取特征 ,在is_eta 为true 的情况下生效 @@ -348,7 +353,7 @@ is_timefurture = True # 是否使用时间特征 is_fivemodels = False # 是否使用之前保存的最佳的5个模型 is_edbcode = False # 特征使用edbcoding列表中的 is_edbnamelist = False # 自定义特征,对应上面的edbnamelist -is_update_eta = True # 预测结果上传到eta +is_update_eta = False # 预测结果上传到eta is_update_report = False # 是否上传报告 is_update_warning_data = False # 是否上传预警数据 is_update_predict_value = True # 是否上传预测值到市场信息平台 diff --git a/config_juxiting.py b/config_juxiting.py index 44fb290..98597fe 100644 --- a/config_juxiting.py +++ b/config_juxiting.py @@ -155,7 +155,6 @@ push_waring_data_value_list_url = f"http://{server_host}/jingbo-api/api/basicBui # 获取预警数据中取消订阅指标ID get_waring_data_value_list_url = f"http://{server_host}/jingbo-api/api/basicBuiness/crudeOilWarning/dataList" - login_data = { "data": { "account": "api_dev", @@ -254,6 +253,27 @@ push_waring_data_value_list_data = { get_waring_data_value_list_data = { "data": "9", "funcModule": "商品数据同步", "funcOperation": "同步"} +# 套期保值正式环境start +tqbz_login_url = "http://10.200.32.39/jbsh/api/server/login" +tqbz_login_data = { + "funcModule": "login", + "funcOperation": "login", + "data": { + "account": "api_szh", + "password": "ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=", + "tenantHashCode":"7145255749fb7d61263081fca3cb1469", + "terminal": "API" + } +} +query_is_trading_day_url = "http://10.200.32.39/jbsh/api/dd/futuresCalendar/checkTradeDay" +is_trading_day_data = { + "funcModule": "检查是否交易日", + "funcOperation": "检查是否交易日", + "data": "20251010" +} +# 套期保值正式环境end + + # 八大维度数据项编码 bdwd_items = { @@ -281,136 +301,138 @@ bdwd_items = { # 变量定义--测试环境 -# server_host = '192.168.100.53' # 内网 -# # server_host = '183.242.74.28' # 外网 -# login_pushreport_url = f"http://{server_host}:8080/jingbo-dev/api/server/login" -# # 上传报告 -# upload_url = f"http://{server_host}:8080/jingbo-dev/api/analysis/reportInfo/researchUploadReportSave" -# # 停更预警 -# upload_warning_url = f"http://{server_host}:8080/jingbo-dev/api/basicBuiness/crudeOilWarning/save" -# # 查询数据项编码 -# query_data_list_item_nos_url = f"http://{server_host}:8080/jingbo-dev/api/warehouse/dwDataItem/queryDataListItemNos" -# # 上传数据项值 -# push_data_value_list_url = f"http://{server_host}:8080/jingbo-dev/api/dw/dataValue/pushDataValueList" -# # 上传停更数据到市场信息平台 -# push_waring_data_value_list_url = f"http://{server_host}:8080/jingbo-dev/api/basicBuiness/crudeOilWarning/crudeSaveOrupdate" -# # 获取预警数据中取消订阅指标ID -# get_waring_data_value_list_url = f"http://{server_host}:8080/jingbo-dev/api/basicBuiness/crudeOilWarning/dataList" - -# login_data = { -# "data": { -# "account": "api_test", -# # "password": "MmVmNzNlOWI0MmY0ZDdjZGUwNzE3ZjFiMDJiZDZjZWU=", # Shihua@123456 -# "password": "ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=", # 123456 -# "tenantHashCode": "8a4577dbd919675758d57999a1e891fe", -# "terminal": "API" -# }, -# "funcModule": "API", -# "funcOperation": "获取token" -# } - -# upload_data = { -# "funcModule": '研究报告信息', -# "funcOperation": '上传聚烯烃PP价格预测报告', -# "data": { -# "groupNo": "000127", -# "ownerAccount": 'arui', # 报告所属用户账号 -# "reportType": 'OIL_PRICE_FORECAST', # 报告类型,固定为OIL_PRICE_FORECAST -# "fileName": '2000-40-5-50--100-原油指标数据.xlsx-Brent活跃合约--2024-09-06-15-01-29-预测报告.pdf', # 文件名称 -# "fileBase64": '', # 文件内容base64 -# "categoryNo": 'yyjgycbg', # 研究报告分类编码 -# "smartBusinessClassCode": 'JXTJGYCBG', # 分析报告分类编码 -# "reportEmployeeCode": "E40116", # 报告人 -# "reportDeptCode": "D0044", # 报告部门 -# "productGroupCode": "RAW_MATERIAL" # 商品分类 -# } -# } - -# # 已弃用 -# warning_data = { -# "funcModule": '原油特征停更预警', -# "funcOperation": '原油特征停更预警', -# "data": { -# "groupNo": "000127", -# 'WARNING_TYPE_NAME': '特征数据停更预警', -# 'WARNING_CONTENT': '', -# 'WARNING_DATE': '' -# } -# } - -# query_data_list_item_nos_data = { -# "funcModule": "数据项", -# "funcOperation": "查询", -# "data": { -# "dateStart": "20200101", -# "dateEnd": "", -# # 数据项编码,代表 PP期货 价格 -# "dataItemNoList": ["MAIN_CONFT_SETTLE_PRICE"] -# } -# } +server_host = '192.168.100.53' # 内网 +# server_host = '183.242.74.28' # 外网 +login_pushreport_url = f"http://{server_host}:8080/jingbo-dev/api/server/login" +# 上传报告 +upload_url = f"http://{server_host}:8080/jingbo-dev/api/analysis/reportInfo/researchUploadReportSave" +# 停更预警 +upload_warning_url = f"http://{server_host}:8080/jingbo-dev/api/basicBuiness/crudeOilWarning/save" +# 查询数据项编码 +query_data_list_item_nos_url = f"http://{server_host}:8080/jingbo-dev/api/warehouse/dwDataItem/queryDataListItemNos" +# 上传数据项值 +push_data_value_list_url = f"http://{server_host}:8080/jingbo-dev/api/dw/dataValue/pushDataValueList" +# 上传停更数据到市场信息平台 +push_waring_data_value_list_url = f"http://{server_host}:8080/jingbo-dev/api/basicBuiness/crudeOilWarning/crudeSaveOrupdate" +# 获取预警数据中取消订阅指标ID +get_waring_data_value_list_url = f"http://{server_host}:8080/jingbo-dev/api/basicBuiness/crudeOilWarning/dataList" -# push_data_value_list_data = { -# "funcModule": "数据表信息列表", -# "funcOperation": "新增", -# "data": [ -# {"dataItemNo": "91230600716676129", -# "dataDate": "20230113", -# "dataStatus": "add", -# "dataValue": 100.11 -# }, -# {"dataItemNo": "91230600716676129P|ETHYL_BEN|CAPACITY", -# "dataDate": "20230113", -# "dataStatus": "add", -# "dataValue": 100.55 -# }, -# {"dataItemNo": "91230600716676129P|ETHYL_BEN|CAPACITY", -# "dataDate": "20230113", -# "dataStatus": "add", -# "dataValue": 100.55 -# } -# ] -# } +login_data = { + "data": { + "account": "api_test", + # "password": "MmVmNzNlOWI0MmY0ZDdjZGUwNzE3ZjFiMDJiZDZjZWU=", # Shihua@123456 + "password": "ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=", # 123456 + "tenantHashCode": "8a4577dbd919675758d57999a1e891fe", + "terminal": "API" + }, + "funcModule": "API", + "funcOperation": "获取token" +} + +upload_data = { + "funcModule": '研究报告信息', + "funcOperation": '上传聚烯烃PP价格预测报告', + "data": { + "groupNo": "000127", + "ownerAccount": 'arui', # 报告所属用户账号 + "reportType": 'OIL_PRICE_FORECAST', # 报告类型,固定为OIL_PRICE_FORECAST + "fileName": '2000-40-5-50--100-原油指标数据.xlsx-Brent活跃合约--2024-09-06-15-01-29-预测报告.pdf', # 文件名称 + "fileBase64": '', # 文件内容base64 + "categoryNo": 'yyjgycbg', # 研究报告分类编码 + "smartBusinessClassCode": 'JXTJGYCBG', # 分析报告分类编码 + "reportEmployeeCode": "E40116", # 报告人 + "reportDeptCode": "D0044", # 报告部门 + "productGroupCode": "RAW_MATERIAL" # 商品分类 + } +} + +# 已弃用 +warning_data = { + "funcModule": '原油特征停更预警', + "funcOperation": '原油特征停更预警', + "data": { + "groupNo": "000127", + 'WARNING_TYPE_NAME': '特征数据停更预警', + 'WARNING_CONTENT': '', + 'WARNING_DATE': '' + } +} + +query_data_list_item_nos_data = { + "funcModule": "数据项", + "funcOperation": "查询", + "data": { + "dateStart": "20200101", + "dateEnd": "", + # 数据项编码,代表 PP期货 价格 + "dataItemNoList": ["MAIN_CONFT_SETTLE_PRICE"] + } +} -# push_waring_data_value_list_data = { -# "data": { -# "crudeOilWarningDtoList": [ -# { -# "lastUpdateDate": "20240501", -# "updateSuspensionCycle": 1, -# "dataSource": "9", -# "frequency": "1", -# "indicatorName": "美元指数", -# "indicatorId": "myzs001", -# "warningDate": "2024-05-13" -# } -# ], -# "dataSource": "9" -# }, -# "funcModule": "商品数据同步", -# "funcOperation": "同步" -# } +push_data_value_list_data = { + "funcModule": "数据表信息列表", + "funcOperation": "新增", + "data": [ + {"dataItemNo": "91230600716676129", + "dataDate": "20230113", + "dataStatus": "add", + "dataValue": 100.11 + }, + {"dataItemNo": "91230600716676129P|ETHYL_BEN|CAPACITY", + "dataDate": "20230113", + "dataStatus": "add", + "dataValue": 100.55 + }, + {"dataItemNo": "91230600716676129P|ETHYL_BEN|CAPACITY", + "dataDate": "20230113", + "dataStatus": "add", + "dataValue": 100.55 + } + ] +} -# get_waring_data_value_list_data = { -# "data": "9", "funcModule": "商品数据同步", "funcOperation": "同步"} +push_waring_data_value_list_data = { + "data": { + "crudeOilWarningDtoList": [ + { + "lastUpdateDate": "20240501", + "updateSuspensionCycle": 1, + "dataSource": "9", + "frequency": "1", + "indicatorName": "美元指数", + "indicatorId": "myzs001", + "warningDate": "2024-05-13" + } + ], + "dataSource": "9" + }, + "funcModule": "商品数据同步", + "funcOperation": "同步" +} -# # 八大维度数据项编码 -# bdwd_items = { -# 'ciri': 'jxtppbdwdcr', -# 'cierri': 'jxtppbdwdcer', -# 'cisanri': 'jxtppbdwdcsanr', -# 'cisiri': 'jxtppbdwdcsir', -# 'benzhou': 'jxtppbdwdbz', -# 'cizhou': 'jxtppbdwdcz', -# 'gezhou': 'jxtppbdwdgz', -# 'ciyue': 'jxtppbdwdcy', -# 'cieryue': 'jxtppbdwdcey', -# 'cisanyue': 'jxtppbdwdcsany', -# 'cisiyue': 'jxtppbdwdcsiy', -# } +get_waring_data_value_list_data = { + "data": "9", "funcModule": "商品数据同步", "funcOperation": "同步"} + + + +# 八大维度数据项编码 +bdwd_items = { + 'ciri': 'jxtppbdwdcr', + 'cierri': 'jxtppbdwdcer', + 'cisanri': 'jxtppbdwdcsanr', + 'cisiri': 'jxtppbdwdcsir', + 'benzhou': 'jxtppbdwdbz', + 'cizhou': 'jxtppbdwdcz', + 'gezhou': 'jxtppbdwdgz', + 'ciyue': 'jxtppbdwdcy', + 'cieryue': 'jxtppbdwdcey', + 'cisanyue': 'jxtppbdwdcsany', + 'cisiyue': 'jxtppbdwdcsiy', +} # 北京环境数据库 host = '192.168.101.27' port = 3306 diff --git a/config_juxiting_yuedu.py b/config_juxiting_yuedu.py index e83ad66..bdd1b29 100644 --- a/config_juxiting_yuedu.py +++ b/config_juxiting_yuedu.py @@ -218,148 +218,19 @@ ClassifyId = 1161 # 变量定义--线上环境 -server_host = '10.200.32.39' -login_pushreport_url = "http://10.200.32.39/jingbo-api/api/server/login" -upload_url = "http://10.200.32.39/jingbo-api/api/analysis/reportInfo/researchUploadReportSave" -upload_warning_url = "http://10.200.32.39/jingbo-api/api/basicBuiness/crudeOilWarning/save" -query_data_list_item_nos_url = f"http://{server_host}/jingbo-api/api/warehouse/dwDataItem/queryDataListItemNos" -# 上传数据项值 -push_data_value_list_url = f"http://{server_host}/jingbo-api/api/dw/dataValue/pushDataValueList" -push_png_report_url = f"http://{server_host}/jingbo-api/api/analysis/reportInfo/priceForecastImg" - -login_data = { - "data": { - "account": "api_dev", - "password": "ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=", - "tenantHashCode": "8a4577dbd919675758d57999a1e891fe", - "terminal": "API" - }, - "funcModule": "API", - "funcOperation": "获取token" -} - - -upload_data = { - "funcModule": '研究报告信息', - "funcOperation": '上传原油价格预测报告', - "data": { - "groupNo": '', # 用户组id - "ownerAccount": '27663', # 报告所属用户账号 27663 - 刘小朋 - "reportType": 'OIL_PRICE_FORECAST', # 报告类型,固定为OIL_PRICE_FORECAST - "fileName": '', # 文件名称 - "fileBase64": '', # 文件内容base64 - "categoryNo": 'yyjgycbg', # 研究报告分类编码 - "smartBusinessClassCode": 'YCJGYCBG', # 分析报告分类编码 - "reportEmployeeCode": "E40482", # 报告人 E40482 - 管理员 0000027663 - 刘小朋 - "reportDeptCode": "002000621000", # 报告部门 - 002000621000 SH期货研究部 - "productGroupCode": "RAW_MATERIAL" # 商品分类 - } -} - -warning_data = { - "groupNo": '', # 用户组id - "funcModule": '原油特征停更预警', - "funcOperation": '原油特征停更预警', - "data": { - 'WARNING_TYPE_NAME': '特征数据停更预警', - 'WARNING_CONTENT': '', - 'WARNING_DATE': '' - } -} - -query_data_list_item_nos_data = { - "funcModule": "数据项", - "funcOperation": "查询", - "data": { - "dateStart": "20150101", - "dateEnd": "20301231", - "dataItemNoList": ["MAIN_CONFT_SETTLE_PRICE"] # 数据项编码,代表 brent最低价和最高价 - } -} - - -push_data_value_list_data = { - "funcModule": "数据表信息列表", - "funcOperation": "新增", - "data": [ - {"dataItemNo": "91230600716676129", - "dataDate": "20230113", - "dataStatus": "add", - "dataValue": 100.11 - }, - {"dataItemNo": "91230600716676129P|ETHYL_BEN|CAPACITY", - "dataDate": "20230113", - "dataStatus": "add", - "dataValue": 100.55 - }, - {"dataItemNo": "91230600716676129P|ETHYL_BEN|CAPACITY", - "dataDate": "20230113", - "dataStatus": "add", - "dataValue": 100.55 - } - ] -} - -push_png_report_data = { - "funcModule": '聚烯烃图片报告', - "funcOperation": '上传聚烯烃PP价格预测图片报告', - "data": { - "groupNo": "000211", - "updateTime": "2024-09-06 15:01:29", - "fileBase64": '', # 文件内容base64 - "title": '2025年8月5日日度周度预测结果', - "billNo": '', - } -} - - -# 八大维度数据项编码 -bdwd_items = { - 'ciri': '251889263|FORECAST|PRICE|T01', - 'cierri': '251889263|FORECAST|PRICE|T02', - 'cisanri': '251889263|FORECAST|PRICE|T03', - 'cisiri': '251889263|FORECAST|PRICE|T04', - 'benzhou': '251889263|FORECAST|PRICE|T05', - 'cizhou': '251889263|FORECAST|PRICE|W_01', - 'gezhou': '251889263|FORECAST|PRICE|W_02', - 'ciyue': '251889263|FORECAST|PRICE|M_01', - 'cieryue': '251889263|FORECAST|PRICE|M_02', - 'cisanyue': '251889263|FORECAST|PRICE|M_03', - 'cisiyue': '251889263|FORECAST|PRICE|M_04', -} - - -# 报告中八大维度数据项重命名 -columnsrename = {'251889263|FORECAST|PRICE|T05': '本周', '251889263|FORECAST|PRICE|M_02': '次二月', '251889263|FORECAST|PRICE|T01': '次日', '251889263|FORECAST|PRICE|M_04': '次四月', - '251889263|FORECAST|PRICE|M_03': '次三月', '251889263|FORECAST|PRICE|M_01': '次月', '251889263|FORECAST|PRICE|W_01': '次周', '251889263|FORECAST|PRICE|W_02': '隔周', } - - -# 生产环境数据库 -host = 'rm-2zehj3r1n60ttz9x5.mysql.rds.aliyuncs.com' -port = 3306 -dbusername = 'jingbo' -password = 'shihua@123' -dbname = 'jingbo' -table_name = 'v_tbl_crude_oil_warning' - - -# 变量定义--测试环境 -# server_host = '192.168.100.53:8080' # 内网 -# # server_host = '183.242.74.28' # 外网 -# login_pushreport_url = f"http://{server_host}/jingbo-dev/api/server/login" -# upload_url = f"http://{server_host}/jingbo-dev/api/analysis/reportInfo/researchUploadReportSave" -# upload_warning_url = f"http://{server_host}/jingbo-dev/api/basicBuiness/crudeOilWarning/save" -# query_data_list_item_nos_url = f"http://{server_host}/jingbo-dev/api/warehouse/dwDataItem/queryDataListItemNos" +# server_host = '10.200.32.39' +# login_pushreport_url = "http://10.200.32.39/jingbo-api/api/server/login" +# upload_url = "http://10.200.32.39/jingbo-api/api/analysis/reportInfo/researchUploadReportSave" +# upload_warning_url = "http://10.200.32.39/jingbo-api/api/basicBuiness/crudeOilWarning/save" +# query_data_list_item_nos_url = f"http://{server_host}/jingbo-api/api/warehouse/dwDataItem/queryDataListItemNos" # # 上传数据项值 -# push_data_value_list_url = f"http://{server_host}/jingbo-dev/api/dw/dataValue/pushDataValueList" -# # 上传图片报告 -# push_png_report_url = f"http://{server_host}/jingbo-dev/api/analysis/reportInfo/priceForecastImg" +# push_data_value_list_url = f"http://{server_host}/jingbo-api/api/dw/dataValue/pushDataValueList" +# push_png_report_url = f"http://{server_host}/jingbo-api/api/analysis/reportInfo/priceForecastImg" # login_data = { # "data": { -# "account": "api_test", -# # "password": "MmVmNzNlOWI0MmY0ZDdjZGUwNzE3ZjFiMDJiZDZjZWU=", # Shihua@123456 -# "password": "ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=", # 123456 +# "account": "api_dev", +# "password": "ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=", # "tenantHashCode": "8a4577dbd919675758d57999a1e891fe", # "terminal": "API" # }, @@ -367,24 +238,26 @@ table_name = 'v_tbl_crude_oil_warning' # "funcOperation": "获取token" # } + # upload_data = { # "funcModule": '研究报告信息', -# "funcOperation": '上传聚烯烃PP价格预测报告', +# "funcOperation": '上传原油价格预测报告', # "data": { -# "ownerAccount": 'arui', # 报告所属用户账号 +# "groupNo": '', # 用户组id +# "ownerAccount": '27663', # 报告所属用户账号 27663 - 刘小朋 # "reportType": 'OIL_PRICE_FORECAST', # 报告类型,固定为OIL_PRICE_FORECAST -# "fileName": '2000-40-5-50--100-原油指标数据.xlsx-Brent活跃合约--2024-09-06-15-01-29-预测报告.pdf', # 文件名称 +# "fileName": '', # 文件名称 # "fileBase64": '', # 文件内容base64 # "categoryNo": 'yyjgycbg', # 研究报告分类编码 -# "smartBusinessClassCode": 'JXTJGYCBG', # 分析报告分类编码 -# "reportEmployeeCode": "E40116", # 报告人 -# "reportDeptCode": "D0044", # 报告部门 +# "smartBusinessClassCode": 'YCJGYCBG', # 分析报告分类编码 +# "reportEmployeeCode": "E40482", # 报告人 E40482 - 管理员 0000027663 - 刘小朋 +# "reportDeptCode": "002000621000", # 报告部门 - 002000621000 SH期货研究部 # "productGroupCode": "RAW_MATERIAL" # 商品分类 # } # } - # warning_data = { +# "groupNo": '', # 用户组id # "funcModule": '原油特征停更预警', # "funcOperation": '原油特征停更预警', # "data": { @@ -398,13 +271,13 @@ table_name = 'v_tbl_crude_oil_warning' # "funcModule": "数据项", # "funcOperation": "查询", # "data": { -# "dateStart": "20200101", -# "dateEnd": "20241231", -# # 数据项编码,代表 PP期货 价格 -# "dataItemNoList": ["MAIN_CONFT_SETTLE_PRICE"] +# "dateStart": "20150101", +# "dateEnd": "20301231", +# "dataItemNoList": ["MAIN_CONFT_SETTLE_PRICE"] # 数据项编码,代表 brent最低价和最高价 # } # } + # push_data_value_list_data = { # "funcModule": "数据表信息列表", # "funcOperation": "新增", @@ -427,35 +300,183 @@ table_name = 'v_tbl_crude_oil_warning' # ] # } - # push_png_report_data = { -# "funcModule": '研究报告信息', -# "funcOperation": '上传聚烯烃PP价格预测报告', +# "funcModule": '聚烯烃图片报告', +# "funcOperation": '上传聚烯烃PP价格预测图片报告', # "data": { -# "groupNo": "000161", +# "groupNo": "000211", # "updateTime": "2024-09-06 15:01:29", # "fileBase64": '', # 文件内容base64 # "title": '2025年8月5日日度周度预测结果', # "billNo": '', +# "pushContent": "" # } # } +# 套期保值正式环境start +tqbz_login_url = "http://10.200.32.39/jbsh/api/server/login" +tqbz_login_data = { + "funcModule": "login", + "funcOperation": "login", + "data": { + "account": "api_szh", + "password": "ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=", + "tenantHashCode":"7145255749fb7d61263081fca3cb1469", + "terminal": "API" + } +} +query_is_trading_day_url = "http://10.200.32.39/jbsh/api/dd/futuresCalendar/checkTradeDay" +is_trading_day_data = { + "funcModule": "检查是否交易日", + "funcOperation": "检查是否交易日", + "data": "20251010" +} +# 套期保值正式环境end # # 八大维度数据项编码 # bdwd_items = { -# 'ciri': 'jxtppbdwdcr', -# 'benzhou': 'jxtppbdwdbz', -# 'cizhou': 'jxtppbdwdcz', -# 'gezhou': 'jxtppbdwdgz', -# 'ciyue': 'jxtppbdwdcy', -# 'cieryue': 'jxtppbdwdcey', -# 'cisanyue': 'jxtppbdwdcsany', -# 'cisiyue': 'jxtppbdwdcsiy', +# 'ciri': '251889263|FORECAST|PRICE|T01', +# 'cierri': '251889263|FORECAST|PRICE|T02', +# 'cisanri': '251889263|FORECAST|PRICE|T03', +# 'cisiri': '251889263|FORECAST|PRICE|T04', +# 'benzhou': '251889263|FORECAST|PRICE|T05', +# 'cizhou': '251889263|FORECAST|PRICE|W_01', +# 'gezhou': '251889263|FORECAST|PRICE|W_02', +# 'ciyue': '251889263|FORECAST|PRICE|M_01', +# 'cieryue': '251889263|FORECAST|PRICE|M_02', +# 'cisanyue': '251889263|FORECAST|PRICE|M_03', +# 'cisiyue': '251889263|FORECAST|PRICE|M_04', # } + # # 报告中八大维度数据项重命名 -# columnsrename = {'jxtppbdwdbz': '本周', 'jxtppbdwdcey': '次二月', 'jxtppbdwdcr': '次日', 'jxtppbdwdcsiy': '次四月', -# 'jxtppbdwdcsany': '次三月', 'jxtppbdwdcy': '次月', 'jxtppbdwdcz': '次周', 'jxtppbdwdgz': '隔周', } +# columnsrename = {'251889263|FORECAST|PRICE|T05': '本周', '251889263|FORECAST|PRICE|M_02': '次二月', '251889263|FORECAST|PRICE|T01': '次日', '251889263|FORECAST|PRICE|M_04': '次四月', +# '251889263|FORECAST|PRICE|M_03': '次三月', '251889263|FORECAST|PRICE|M_01': '次月', '251889263|FORECAST|PRICE|W_01': '次周', '251889263|FORECAST|PRICE|W_02': '隔周', } + + +# # 生产环境数据库 +# host = 'rm-2zehj3r1n60ttz9x5.mysql.rds.aliyuncs.com' +# port = 3306 +# dbusername = 'jingbo' +# password = 'shihua@123' +# dbname = 'jingbo' +# table_name = 'v_tbl_crude_oil_warning' + + +# 变量定义--测试环境 +server_host = '192.168.100.53:8080' # 内网 +# server_host = '183.242.74.28' # 外网 +login_pushreport_url = f"http://{server_host}/jingbo-dev/api/server/login" +upload_url = f"http://{server_host}/jingbo-dev/api/analysis/reportInfo/researchUploadReportSave" +upload_warning_url = f"http://{server_host}/jingbo-dev/api/basicBuiness/crudeOilWarning/save" +query_data_list_item_nos_url = f"http://{server_host}/jingbo-dev/api/warehouse/dwDataItem/queryDataListItemNos" +# 上传数据项值 +push_data_value_list_url = f"http://{server_host}/jingbo-dev/api/dw/dataValue/pushDataValueList" +# 上传图片报告 +push_png_report_url = f"http://{server_host}/jingbo-dev/api/analysis/reportInfo/priceForecastImg" + +login_data = { + "data": { + "account": "api_test", + # "password": "MmVmNzNlOWI0MmY0ZDdjZGUwNzE3ZjFiMDJiZDZjZWU=", # Shihua@123456 + "password": "ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=", # 123456 + "tenantHashCode": "8a4577dbd919675758d57999a1e891fe", + "terminal": "API" + }, + "funcModule": "API", + "funcOperation": "获取token" +} + +upload_data = { + "funcModule": '研究报告信息', + "funcOperation": '上传聚烯烃PP价格预测报告', + "data": { + "ownerAccount": 'arui', # 报告所属用户账号 + "reportType": 'OIL_PRICE_FORECAST', # 报告类型,固定为OIL_PRICE_FORECAST + "fileName": '2000-40-5-50--100-原油指标数据.xlsx-Brent活跃合约--2024-09-06-15-01-29-预测报告.pdf', # 文件名称 + "fileBase64": '', # 文件内容base64 + "categoryNo": 'yyjgycbg', # 研究报告分类编码 + "smartBusinessClassCode": 'JXTJGYCBG', # 分析报告分类编码 + "reportEmployeeCode": "E40116", # 报告人 + "reportDeptCode": "D0044", # 报告部门 + "productGroupCode": "RAW_MATERIAL" # 商品分类 + } +} + + +warning_data = { + "funcModule": '原油特征停更预警', + "funcOperation": '原油特征停更预警', + "data": { + 'WARNING_TYPE_NAME': '特征数据停更预警', + 'WARNING_CONTENT': '', + 'WARNING_DATE': '' + } +} + +query_data_list_item_nos_data = { + "funcModule": "数据项", + "funcOperation": "查询", + "data": { + "dateStart": "20200101", + "dateEnd": "20241231", + # 数据项编码,代表 PP期货 价格 + "dataItemNoList": ["MAIN_CONFT_SETTLE_PRICE"] + } +} + +push_data_value_list_data = { + "funcModule": "数据表信息列表", + "funcOperation": "新增", + "data": [ + {"dataItemNo": "91230600716676129", + "dataDate": "20230113", + "dataStatus": "add", + "dataValue": 100.11 + }, + {"dataItemNo": "91230600716676129P|ETHYL_BEN|CAPACITY", + "dataDate": "20230113", + "dataStatus": "add", + "dataValue": 100.55 + }, + {"dataItemNo": "91230600716676129P|ETHYL_BEN|CAPACITY", + "dataDate": "20230113", + "dataStatus": "add", + "dataValue": 100.55 + } + ] +} + + +push_png_report_data = { + "funcModule": '研究报告信息', + "funcOperation": '上传聚烯烃PP价格预测报告', + "data": { + "groupNo": "000161", + "updateTime": "2024-09-06 15:01:29", + "fileBase64": '', # 文件内容base64 + "title": '2025年8月5日日度周度预测结果', + "billNo": '', + "pushContent": "2025年8月5日日度周度预测结果" + } +} + + +# 八大维度数据项编码 +bdwd_items = { + 'ciri': 'jxtppbdwdcr', + 'benzhou': 'jxtppbdwdbz', + 'cizhou': 'jxtppbdwdcz', + 'gezhou': 'jxtppbdwdgz', + 'ciyue': 'jxtppbdwdcy', + 'cieryue': 'jxtppbdwdcey', + 'cisanyue': 'jxtppbdwdcsany', + 'cisiyue': 'jxtppbdwdcsiy', +} + +# 报告中八大维度数据项重命名 +columnsrename = {'jxtppbdwdbz': '本周', 'jxtppbdwdcey': '次二月', 'jxtppbdwdcr': '次日', 'jxtppbdwdcsiy': '次四月', + 'jxtppbdwdcsany': '次三月', 'jxtppbdwdcy': '次月', 'jxtppbdwdcz': '次周', 'jxtppbdwdgz': '隔周', } # 北京环境数据库 host = '192.168.101.27' diff --git a/config_juxiting_zhoudu.py b/config_juxiting_zhoudu.py index ed4e5e6..643d25a 100644 --- a/config_juxiting_zhoudu.py +++ b/config_juxiting_zhoudu.py @@ -169,23 +169,185 @@ ClassifyId = 1161 # 变量定义--线上环境 -server_host = '10.200.32.39' -login_pushreport_url = "http://10.200.32.39/jingbo-api/api/server/login" -upload_url = "http://10.200.32.39/jingbo-api/api/analysis/reportInfo/researchUploadReportSave" -upload_warning_url = "http://10.200.32.39/jingbo-api/api/basicBuiness/crudeOilWarning/save" -query_data_list_item_nos_url = f"http://{server_host}/jingbo-api/api/warehouse/dwDataItem/queryDataListItemNos" -# 上传数据项值 -push_data_value_list_url = f"http://{server_host}/jingbo-api/api/dw/dataValue/pushDataValueList" -# 上传停更数据到市场信息平台 -push_waring_data_value_list_url = f"http://{server_host}/jingbo-api/api/basicBuiness/crudeOilWarning/crudeSaveOrupdate" -# 获取预警数据中取消订阅指标ID -get_waring_data_value_list_url = f"http://{server_host}/jingbo-api/api/basicBuiness/crudeOilWarning/dataList" +# server_host = '10.200.32.39' +# login_pushreport_url = "http://10.200.32.39/jingbo-api/api/server/login" +# upload_url = "http://10.200.32.39/jingbo-api/api/analysis/reportInfo/researchUploadReportSave" +# upload_warning_url = "http://10.200.32.39/jingbo-api/api/basicBuiness/crudeOilWarning/save" +# query_data_list_item_nos_url = f"http://{server_host}/jingbo-api/api/warehouse/dwDataItem/queryDataListItemNos" +# # 上传数据项值 +# push_data_value_list_url = f"http://{server_host}/jingbo-api/api/dw/dataValue/pushDataValueList" +# # 上传停更数据到市场信息平台 +# push_waring_data_value_list_url = f"http://{server_host}/jingbo-api/api/basicBuiness/crudeOilWarning/crudeSaveOrupdate" +# # 获取预警数据中取消订阅指标ID +# get_waring_data_value_list_url = f"http://{server_host}/jingbo-api/api/basicBuiness/crudeOilWarning/dataList" +# login_data = { +# "data": { +# "account": "api_dev", +# "password": "ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=", +# "tenantHashCode": "8a4577dbd919675758d57999a1e891fe", +# "terminal": "API" +# }, +# "funcModule": "API", +# "funcOperation": "获取token" +# } + + +# upload_data = { +# "funcModule": '研究报告信息', +# "funcOperation": '上传聚烯烃PP价格预测报告', +# "data": { +# "groupNo": '000211', # 用户组编号 +# "ownerAccount": '36541', # 报告所属用户账号  36541 - 贾青雪 +# "reportType": 'OIL_PRICE_FORECAST', # 报告类型,固定为OIL_PRICE_FORECAST +# "fileName": '', # 文件名称 +# "fileBase64": '', # 文件内容base64 +# "categoryNo": 'jxtjgycbg', # 研究报告分类编码 +# "smartBusinessClassCode": 'JXTJGYCBG', # 分析报告分类编码 +# "reportEmployeeCode": "E40482", # 报告人  E40482  - 管理员  0000027663 - 刘小朋   +# "reportDeptCode": "JXTJGYCBG", # 报告部门 - 002000621000  SH期货研究部   +# "productGroupCode": "RAW_MATERIAL" # 商品分类 +# } +# } + +# warning_data = { +# "funcModule": '原油特征停更预警', +# "funcOperation": '原油特征停更预警', +# "data": { +# "groupNo": "000211", +# 'WARNING_TYPE_NAME': '特征数据停更预警', +# 'WARNING_CONTENT': '', +# 'WARNING_DATE': '' +# } +# } + +# query_data_list_item_nos_data = { +# "funcModule": "数据项", +# "funcOperation": "查询", +# "data": { +# "dateStart": "20200101", +# "dateEnd": "", +# # 数据项编码,代表 PP期货 价格 +# "dataItemNoList": ["MAIN_CONFT_SETTLE_PRICE"] +# } +# } + + +# push_data_value_list_data = { +# "funcModule": "数据表信息列表", +# "funcOperation": "新增", +# "data": [ +# {"dataItemNo": "91230600716676129", +# "dataDate": "20230113", +# "dataStatus": "add", +# "dataValue": 100.11 +# }, +# {"dataItemNo": "91230600716676129P|ETHYL_BEN|CAPACITY", +# "dataDate": "20230113", +# "dataStatus": "add", +# "dataValue": 100.55 +# }, +# {"dataItemNo": "91230600716676129P|ETHYL_BEN|CAPACITY", +# "dataDate": "20230113", +# "dataStatus": "add", +# "dataValue": 100.55 +# } +# ] +# } + + +# push_waring_data_value_list_data = { +# "data": { +# "crudeOilWarningDtoList": [ +# { +# "lastUpdateDate": "20240501", +# "updateSuspensionCycle": 1, +# "dataSource": "9", +# "frequency": "1", +# "indicatorName": "美元指数", +# "indicatorId": "myzs001", +# "warningDate": "2024-05-13" +# } +# ], +# "dataSource": "9" +# }, +# "funcModule": "商品数据同步", +# "funcOperation": "同步" +# } + + +# get_waring_data_value_list_data = { +# "data": "9", "funcModule": "商品数据同步", "funcOperation": "同步"} + +# 套期保值正式环境start +tqbz_login_url = "http://10.200.32.39/jbsh/api/server/login" +tqbz_login_data = { + "funcModule": "login", + "funcOperation": "login", + "data": { + "account": "api_szh", + "password": "ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=", + "tenantHashCode":"7145255749fb7d61263081fca3cb1469", + "terminal": "API" + } +} +query_is_trading_day_url = "http://10.200.32.39/jbsh/api/dd/futuresCalendar/checkTradeDay" +is_trading_day_data = { + "funcModule": "检查是否交易日", + "funcOperation": "检查是否交易日", + "data": "20251010" +} +# 套期保值正式环境end + +# # 八大维度数据项编码 +# bdwd_items = { +# 'ciri': '251889263|FORECAST|PRICE|T01', +# 'cierri': '251889263|FORECAST|PRICE|T02', +# 'cisanri': '251889263|FORECAST|PRICE|T03', +# 'cisiri': '251889263|FORECAST|PRICE|T04', +# 'benzhou': '251889263|FORECAST|PRICE|T05', +# 'cizhou': '251889263|FORECAST|PRICE|W_01', +# 'gezhou': '251889263|FORECAST|PRICE|W_02', +# 'ciyue': '251889263|FORECAST|PRICE|M_01', +# 'cieryue': '251889263|FORECAST|PRICE|M_02', +# 'cisanyue': '251889263|FORECAST|PRICE|M_03', +# 'cisiyue': '251889263|FORECAST|PRICE|M_04', +# } + + + + +# # 生产环境数据库 +# host = 'rm-2zehj3r1n60ttz9x5.mysql.rds.aliyuncs.com' +# port = 3306 +# dbusername = 'jingbo' +# password = 'shihua@123' +# dbname = 'jingbo' +# table_name = 'v_tbl_crude_oil_warning' + +# 变量定义--测试环境 +server_host = '192.168.100.53' # 内网 +# server_host = '183.242.74.28' # 外网 +login_pushreport_url = f"http://{server_host}:8080/jingbo-dev/api/server/login" +# 上传报告 +upload_url = f"http://{server_host}:8080/jingbo-dev/api/analysis/reportInfo/researchUploadReportSave" +# 停更预警 +upload_warning_url = f"http://{server_host}:8080/jingbo-dev/api/basicBuiness/crudeOilWarning/save" +# 查询数据项编码 +query_data_list_item_nos_url = f"http://{server_host}:8080/jingbo-dev/api/warehouse/dwDataItem/queryDataListItemNos" +# 上传数据项值 +push_data_value_list_url = f"http://{server_host}:8080/jingbo-dev/api/dw/dataValue/pushDataValueList" +# 上传停更数据到市场信息平台 +push_waring_data_value_list_url = f"http://{server_host}:8080/jingbo-dev/api/basicBuiness/crudeOilWarning/crudeSaveOrupdate" +# 获取预警数据中取消订阅指标ID +get_waring_data_value_list_url = f"http://{server_host}:8080/jingbo-dev/api/basicBuiness/crudeOilWarning/dataList" + login_data = { "data": { - "account": "api_dev", - "password": "ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=", + "account": "api_test", + # "password": "MmVmNzNlOWI0MmY0ZDdjZGUwNzE3ZjFiMDJiZDZjZWU=", # Shihua@123456 + "password": "ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=", # 123456 "tenantHashCode": "8a4577dbd919675758d57999a1e891fe", "terminal": "API" }, @@ -193,29 +355,29 @@ login_data = { "funcOperation": "获取token" } - upload_data = { "funcModule": '研究报告信息', "funcOperation": '上传聚烯烃PP价格预测报告', "data": { - "groupNo": '000211', # 用户组编号 - "ownerAccount": '36541', # 报告所属用户账号  36541 - 贾青雪 + "groupNo": "000127", + "ownerAccount": 'arui', # 报告所属用户账号 "reportType": 'OIL_PRICE_FORECAST', # 报告类型,固定为OIL_PRICE_FORECAST - "fileName": '', # 文件名称 + "fileName": '2000-40-5-50--100-原油指标数据.xlsx-Brent活跃合约--2024-09-06-15-01-29-预测报告.pdf', # 文件名称 "fileBase64": '', # 文件内容base64 - "categoryNo": 'jxtjgycbg', # 研究报告分类编码 + "categoryNo": 'yyjgycbg', # 研究报告分类编码 "smartBusinessClassCode": 'JXTJGYCBG', # 分析报告分类编码 - "reportEmployeeCode": "E40482", # 报告人  E40482  - 管理员  0000027663 - 刘小朋   - "reportDeptCode": "JXTJGYCBG", # 报告部门 - 002000621000  SH期货研究部   - "productGroupCode": "RAW_MATERIAL" # 商品分类 + "reportEmployeeCode": "E40116", # 报告人 + "reportDeptCode": "D0044", # 报告部门 + "productGroupCode": "RAW_MATERIAL" # 商品分类 } } +# 已弃用 warning_data = { "funcModule": '原油特征停更预警', "funcOperation": '原油特征停更预警', "data": { - "groupNo": "000211", + "groupNo": "000127", 'WARNING_TYPE_NAME': '特征数据停更预警', 'WARNING_CONTENT': '', 'WARNING_DATE': '' @@ -233,7 +395,6 @@ query_data_list_item_nos_data = { } } - push_data_value_list_data = { "funcModule": "数据表信息列表", "funcOperation": "新增", @@ -281,170 +442,26 @@ get_waring_data_value_list_data = { "data": "9", "funcModule": "商品数据同步", "funcOperation": "同步"} - # 八大维度数据项编码 bdwd_items = { - 'ciri': '251889263|FORECAST|PRICE|T01', - 'cierri': '251889263|FORECAST|PRICE|T02', - 'cisanri': '251889263|FORECAST|PRICE|T03', - 'cisiri': '251889263|FORECAST|PRICE|T04', - 'benzhou': '251889263|FORECAST|PRICE|T05', - 'cizhou': '251889263|FORECAST|PRICE|W_01', - 'gezhou': '251889263|FORECAST|PRICE|W_02', - 'ciyue': '251889263|FORECAST|PRICE|M_01', - 'cieryue': '251889263|FORECAST|PRICE|M_02', - 'cisanyue': '251889263|FORECAST|PRICE|M_03', - 'cisiyue': '251889263|FORECAST|PRICE|M_04', + 'ciri': 'jxtppbdwdcr', + 'benzhou': 'jxtppbdwdbz', + 'cizhou': 'jxtppbdwdcz', + 'gezhou': 'jxtppbdwdgz', + 'ciyue': 'jxtppbdwdcy', + 'cieryue': 'jxtppbdwdcey', + 'cisanyue': 'jxtppbdwdcsany', + 'cisiyue': 'jxtppbdwdcsiy', } - - -# # 生产环境数据库 -host = 'rm-2zehj3r1n60ttz9x5.mysql.rds.aliyuncs.com' -port = 3306 -dbusername = 'jingbo' -password = 'shihua@123' -dbname = 'jingbo' -table_name = 'v_tbl_crude_oil_warning' - - -# 变量定义--测试环境 -# server_host = '192.168.100.53' # 内网 -# # server_host = '183.242.74.28' # 外网 -# login_pushreport_url = f"http://{server_host}:8080/jingbo-dev/api/server/login" -# # 上传报告 -# upload_url = f"http://{server_host}:8080/jingbo-dev/api/analysis/reportInfo/researchUploadReportSave" -# # 停更预警 -# upload_warning_url = f"http://{server_host}:8080/jingbo-dev/api/basicBuiness/crudeOilWarning/save" -# # 查询数据项编码 -# query_data_list_item_nos_url = f"http://{server_host}:8080/jingbo-dev/api/warehouse/dwDataItem/queryDataListItemNos" -# # 上传数据项值 -# push_data_value_list_url = f"http://{server_host}:8080/jingbo-dev/api/dw/dataValue/pushDataValueList" -# # 上传停更数据到市场信息平台 -# push_waring_data_value_list_url = f"http://{server_host}:8080/jingbo-dev/api/basicBuiness/crudeOilWarning/crudeSaveOrupdate" -# # 获取预警数据中取消订阅指标ID -# get_waring_data_value_list_url = f"http://{server_host}:8080/jingbo-dev/api/basicBuiness/crudeOilWarning/dataList" - -# login_data = { -# "data": { -# "account": "api_test", -# # "password": "MmVmNzNlOWI0MmY0ZDdjZGUwNzE3ZjFiMDJiZDZjZWU=", # Shihua@123456 -# "password": "ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=", # 123456 -# "tenantHashCode": "8a4577dbd919675758d57999a1e891fe", -# "terminal": "API" -# }, -# "funcModule": "API", -# "funcOperation": "获取token" -# } - -# upload_data = { -# "funcModule": '研究报告信息', -# "funcOperation": '上传聚烯烃PP价格预测报告', -# "data": { -# "groupNo": "000127", -# "ownerAccount": 'arui', # 报告所属用户账号 -# "reportType": 'OIL_PRICE_FORECAST', # 报告类型,固定为OIL_PRICE_FORECAST -# "fileName": '2000-40-5-50--100-原油指标数据.xlsx-Brent活跃合约--2024-09-06-15-01-29-预测报告.pdf', # 文件名称 -# "fileBase64": '', # 文件内容base64 -# "categoryNo": 'yyjgycbg', # 研究报告分类编码 -# "smartBusinessClassCode": 'JXTJGYCBG', # 分析报告分类编码 -# "reportEmployeeCode": "E40116", # 报告人 -# "reportDeptCode": "D0044", # 报告部门 -# "productGroupCode": "RAW_MATERIAL" # 商品分类 -# } -# } - -# # 已弃用 -# warning_data = { -# "funcModule": '原油特征停更预警', -# "funcOperation": '原油特征停更预警', -# "data": { -# "groupNo": "000127", -# 'WARNING_TYPE_NAME': '特征数据停更预警', -# 'WARNING_CONTENT': '', -# 'WARNING_DATE': '' -# } -# } - -# query_data_list_item_nos_data = { -# "funcModule": "数据项", -# "funcOperation": "查询", -# "data": { -# "dateStart": "20200101", -# "dateEnd": "", -# # 数据项编码,代表 PP期货 价格 -# "dataItemNoList": ["MAIN_CONFT_SETTLE_PRICE"] -# } -# } - -# push_data_value_list_data = { -# "funcModule": "数据表信息列表", -# "funcOperation": "新增", -# "data": [ -# {"dataItemNo": "91230600716676129", -# "dataDate": "20230113", -# "dataStatus": "add", -# "dataValue": 100.11 -# }, -# {"dataItemNo": "91230600716676129P|ETHYL_BEN|CAPACITY", -# "dataDate": "20230113", -# "dataStatus": "add", -# "dataValue": 100.55 -# }, -# {"dataItemNo": "91230600716676129P|ETHYL_BEN|CAPACITY", -# "dataDate": "20230113", -# "dataStatus": "add", -# "dataValue": 100.55 -# } -# ] -# } - - -# push_waring_data_value_list_data = { -# "data": { -# "crudeOilWarningDtoList": [ -# { -# "lastUpdateDate": "20240501", -# "updateSuspensionCycle": 1, -# "dataSource": "9", -# "frequency": "1", -# "indicatorName": "美元指数", -# "indicatorId": "myzs001", -# "warningDate": "2024-05-13" -# } -# ], -# "dataSource": "9" -# }, -# "funcModule": "商品数据同步", -# "funcOperation": "同步" -# } - - -# get_waring_data_value_list_data = { -# "data": "9", "funcModule": "商品数据同步", "funcOperation": "同步"} - - -# # 八大维度数据项编码 -# bdwd_items = { -# 'ciri': 'jxtppbdwdcr', -# 'benzhou': 'jxtppbdwdbz', -# 'cizhou': 'jxtppbdwdcz', -# 'gezhou': 'jxtppbdwdgz', -# 'ciyue': 'jxtppbdwdcy', -# 'cieryue': 'jxtppbdwdcey', -# 'cisanyue': 'jxtppbdwdcsany', -# 'cisiyue': 'jxtppbdwdcsiy', -# } - - # 北京环境数据库 -# host = '192.168.101.27' -# port = 3306 -# dbusername = 'root' -# password = '123456' -# dbname = 'jingbo_test' -# table_name = 'v_tbl_crude_oil_warning' +host = '192.168.101.27' +port = 3306 +dbusername = 'root' +password = '123456' +dbname = 'jingbo_test' +table_name = 'v_tbl_crude_oil_warning' DEFAULT_CONFIG = { 'feature_factor_frequency': 'D', diff --git a/juxiting_push_png_report.py b/juxiting_push_png_report.py index dcfb49b..a06795c 100644 --- a/juxiting_push_png_report.py +++ b/juxiting_push_png_report.py @@ -106,6 +106,12 @@ def push_png_report(): data['data']['fileBase64'] = base64_data data['data']['billNo'] = str(time.time()) + if png_report_file == 'pp_zhouducorrelation.png': + data['data']['pushContent'] = f'{end_time}PP期货日、周维度预测价格走势' + else: + data['data']['pushContent'] = f'{end_time}PP期货月维度预测价格走势' + + # data['data']['pushContent'] = f'{end_time}PP期货价格预测' pngreportdata = push_png_report_to_market(data) logger.info(f'{png_report_file}推送图片报告到钉钉成功{pngreportdata}') except Exception as e: @@ -116,10 +122,27 @@ if __name__ == '__main__': # 图片报告 try: logger.info('图片报告ing') - global_config['end_time'] = '2025-08-14' + global_config['end_time'] = '2025-08-01' + # previous_trading_day = (pd.Timestamp(global_config['end_time']) - + # pd.tseries.offsets.BusinessDay(1)).strftime('%Y-%m-%d') + # global_config['end_time'] = previous_trading_day + + # 交易日检查 + # 将 end_time 转换为 YYYYMMDD 格式 + end_time_str = global_config['end_time'].replace('-', '') + logger.info(f"开始检查预测日期 {global_config['end_time']} 是否为交易日") + + # 使用 global_config 中的交易日检查参数 + is_trading = check_trading_day(end_time_str, global_config) + if not is_trading: + logger.info(f"预测日期 {global_config['end_time']} 不是交易日,跳过图片报告任务") + exit(0) + else: + logger.info(f"预测日期 {global_config['end_time']} 是交易日,开始执行图片报告任务") + pp_bdwd_png(global_config=global_config) logger.info('图片报告end') except Exception as e: logger.info(f'图片报告失败:{e}') - # time.sleep(5) - # push_png_report() + time.sleep(5) + push_png_report() diff --git a/lib/dataread.py b/lib/dataread.py index 3d9f3fc..1ed345f 100644 --- a/lib/dataread.py +++ b/lib/dataread.py @@ -80,6 +80,8 @@ global_config = { 'upload_headers': None, # 上传请求头 'upload_warning_url': None, # 预警数据上传地址 'upload_warning_data': None, # 预警数据结构 + 'upload_report_audit_url': None, # 报告审核地址 + 'upload_report_audit_data': None, # 报告审核数据结构 # 报告上传 'upload_data': None, # 报告数据结构 @@ -95,6 +97,12 @@ global_config = { 'push_waring_data_value_list_url': None, 'push_waring_data_value_list_data': None, + # 套期保值api + "tqbz_login_url":None, + "tqbz_login_data":None, + 'query_is_trading_day_url': None, + 'is_trading_day_data': None, + # 字段映射 'offsite_col': None, # 站点字段 'avg_col': None, # 平均值字段 @@ -231,7 +239,7 @@ def get_head_auth_report(): f'url:{config.login_pushreport_url},login_data:{config.login_data}') # 发送 POST 请求到登录 URL,携带登录数据 login_res = requests.post(url=config.login_pushreport_url, - json=config.login_data, timeout=(3, 30)) + json=config.login_data, timeout=(10, 30)) # 将响应内容转换为 JSON 格式 text = json.loads(login_res.text) @@ -272,7 +280,7 @@ def upload_report_data(token, upload_data): # 发送POST请求,上传报告数据 upload_res = requests.post( - url=config.upload_url, headers=headers, json=upload_data, timeout=(3, 15)) + url=config.upload_url, headers=headers, json=upload_data, timeout=(10, 15)) # 将响应内容转换为 JSON 格式 upload_res = json.loads(upload_res.text) @@ -319,7 +327,7 @@ def upload_warning_data(warning_data): # 发送POST请求,上传预警数据 upload_res = requests.post( - url=config.upload_warning_url, headers=headers, json=config.warning_data, timeout=(3, 15)) + url=config.upload_warning_url, headers=headers, json=config.warning_data, timeout=(10, 15)) # 如果上传成功,返回响应对象 if upload_res: @@ -900,21 +908,21 @@ def datachuli(df_zhibiaoshuju, df_zhibiaoliebiao, datecol='date', end_time='', y # 删除全为空值的列 df = df.dropna(axis=1, how='all') - # 删除开始时间没有数据的列 - config.logger.info(f'删除开始时间没有数据的列前数据量:{df.shape}') - for col in df.columns: - if col == 'ds': # 跳过 'ds' 列 - continue - # 找到第一个非空值的索引 - first_valid_index = df[col].first_valid_index() - if first_valid_index is not None: - # 判断对应的 'ds' 是否大于 start_date - if df.loc[first_valid_index, 'ds'] > start_date: - df.drop(columns=[col], inplace=True) - config.logger.info( - f'删除开始时间没有数据的列:{col},第一条数据日期为:{df.loc[first_valid_index, "ds"]}') + # # 删除开始时间没有数据的列 + # config.logger.info(f'删除开始时间没有数据的列前数据量:{df.shape}') + # for col in df.columns: + # if col in ['ds','Brentzgj','Brentzdj']: # 跳过 'ds' 列 + # continue + # # 找到第一个非空值的索引 + # first_valid_index = df[col].first_valid_index() + # if first_valid_index is not None: + # # 判断对应的 'ds' 是否大于 start_date + # if df.loc[first_valid_index, 'ds'] > start_date: + # df.drop(columns=[col], inplace=True) + # config.logger.info( + # f'删除开始时间没有数据的列:{col},第一条数据日期为:{df.loc[first_valid_index, "ds"]}') - config.logger.info(f'删除开始时间没有数据的列后数据量:{df.shape}') + # config.logger.info(f'删除开始时间没有数据的列后数据量:{df.shape}') # 获取start_year年到end_time的数据 df = df[df['ds'].dt.year >= config.start_year] @@ -947,6 +955,8 @@ def datachuli(df_zhibiaoshuju, df_zhibiaoliebiao, datecol='date', end_time='', y if is_timefurture: df = addtimecharacteristics(df=df, dataset=dataset) + + if config.freq == 'WW': # 自定义周数据 # 按weekofmothe分组取均值得到新的数据 @@ -994,6 +1004,14 @@ def datachuli(df_zhibiaoshuju, df_zhibiaoliebiao, datecol='date', end_time='', y # kdj指标 if add_kdj: df = calculate_kdj(df) + + # 衍生预测目标近30日的中位数,最大,最小 + if config.freq == 'M': + df['y_30d_median'] = df['y'].rolling(window=30).median() + df['y_30d_max'] = df['y'].rolling(window=30).max() + df['y_30d_min'] = df['y'].rolling(window=30).min() + + # 保存填充后的数据 df.to_csv(os.path.join( dataset, '处理后的特征数据.csv'), index=False) @@ -1042,6 +1060,18 @@ def zhoududatachuli(df_zhibiaoshuju, df_zhibiaoliebiao, datecol='date', end_time config.logger.info(f'删除两月不更新特征后数据量:{df.shape}') + # 在重采样之前:优先将最高/最低价从原始数据拼接进来,并进行缺失值填充 + try: + for col in ['Brentzgj', 'Brentzdj']: + if col in df_zhibiaoshuju.columns and col not in df.columns: + df = pd.merge(df, df_zhibiaoshuju[['ds', col]], on='ds', how='left') + # 对已存在的高低价列进行前后向填充,确保重采样时不产生 NaN + for col in ['Brentzgj', 'Brentzdj']: + if col in df.columns: + df[col] = df[col].ffill().bfill() + except Exception as e: + config.logger.info(f'重采样前拼接高低价失败:{e}') + if config.freq == 'W': # 按周取样 df = df.resample('W', on='ds').mean().reset_index() @@ -1435,6 +1465,12 @@ class Config: def upload_warning_data(self): return global_config['upload_warning_data'] @property def warning_data(self): return global_config['warning_data'] + + @property + def upload_report_audit_url(self): return global_config['upload_report_audit_url'] + @property + def upload_report_audit_data(self): return global_config['upload_report_audit_data'] + # 查询接口 @property @@ -2454,7 +2490,7 @@ def get_market_data(end_time, df): headers = {"Authorization": token} config.logger.info('获取数据中...') items_res = requests.post(url=config.query_data_list_item_nos_url, headers=headers, - json=config.query_data_list_item_nos_data, timeout=(3, 35)) + json=config.query_data_list_item_nos_data, timeout=(10, 35)) json_data = json.loads(items_res.text) df3 = pd.DataFrame(json_data['data']) # 按照dataItemNo 分组 得到多个dataframe ,最后根据dataDate merge 成一个dataframe @@ -2491,10 +2527,21 @@ def push_png_report_to_market(data): config.logger.info(f'推送图片报告URL:{config.push_png_report_url}') # config.logger.info(f'推送图片报告数据:{data}') items_res = requests.post(url=config.push_png_report_url, headers=headers, - json=data, timeout=(3, 35)) + json=data, timeout=(10, 35)) json_data = json.loads(items_res.text) config.logger.info(f"推送图片报告结果:{json_data}") return json_data + + # 发送请求 + # headers = {"Authorization": token} + # config.logger.info('推送图片报告中...') + # config.logger.info(f'推送图片报告URL:{config.upload_report_audit_url}') + # # config.logger.info(f'推送图片报告数据:{data}') + # items_res = requests.post(url=config.upload_report_audit_url, headers=headers, + # json=data, timeout=(10, 35)) + # json_data = json.loads(items_res.text) + # config.logger.info(f"推送图片报告结果:{json_data}") + # return json_data def push_market_data(data): @@ -2532,7 +2579,7 @@ def push_market_data(data): config.logger.info(f'上传数据数据:{config.push_data_value_list_data}') items_res = requests.post(url=config.push_data_value_list_url, headers=headers, - json=config.push_data_value_list_data, timeout=(3, 35)) + json=config.push_data_value_list_data, timeout=(10, 35)) json_data = json.loads(items_res.text) config.logger.info(f"上传结果:{json_data}") return json_data @@ -2572,7 +2619,7 @@ def push_waring_market_data(data, dataSource=8): headers = {"Authorization": token} config.logger.info('上传数据中...') items_res = requests.post(url=config.push_waring_data_value_list_url, headers=headers, - json=config.push_waring_data_value_list_data, timeout=(3, 35)) + json=config.push_waring_data_value_list_data, timeout=(10, 35)) json_data = json.loads(items_res.text) config.logger.info(f"上传结果:{json_data}") return json_data @@ -2606,7 +2653,7 @@ def get_waring_data(): headers = {"Authorization": token} config.logger.info('获取取消订阅指标ID中...') items_res = requests.post(url=config.get_waring_data_value_list_url, headers=headers, - json=config.get_waring_data_value_list_data, timeout=(3, 35)) + json=config.get_waring_data_value_list_data, timeout=(10, 35)) json_data = json.loads(items_res.text) json_data = json_data['data'] quxiaodingyueidlist = [] @@ -2696,8 +2743,7 @@ def get_baichuan_data(baichuanidnamedict): print(sql) # 获取查询结果 results = db.execute_query(sql) - df = pd.DataFrame(results, columns=[ - 'BAICHUAN_ID', 'DATA_DATE', 'DATA_VALUE']) + df = pd.DataFrame(results, columns=['BAICHUAN_ID', 'DATA_DATE', 'DATA_VALUE']) df['BAICHUAN_ID'] = df['BAICHUAN_ID'].astype('string') df.to_csv(os.path.join(config.dataset, '百川数据.csv'), index=False) @@ -2746,7 +2792,7 @@ def get_bdwd_predict_data(): # 发送POST请求,上传预警数据 respose = requests.post( - url=config.upload_warning_url, headers=headers, json=query_data_list_item_nos_data, timeout=(3, 15)) + url=config.upload_warning_url, headers=headers, json=query_data_list_item_nos_data, timeout=(10, 15)) # 如果上传成功,返回响应对象 if respose: @@ -2779,7 +2825,7 @@ def get_bdwd_predict_data(): headers = {"Authorization": token} config.logger.info('获取数据中...') items_res = requests.post(url=config.query_data_list_item_nos_url, headers=headers, - json=query_data_list_item_nos_data, timeout=(3, 35)) + json=query_data_list_item_nos_data, timeout=(10, 35)) json_data = json.loads(items_res.text) config.logger.info(f"获取到的数据:{json_data}") df3 = pd.DataFrame(json_data['data']) @@ -2811,3 +2857,47 @@ def get_bdwd_predict_data(): print(df2) df2 = df2[existing_columns] return df2 + + +def check_trading_day(date_str, global_config): + """ + 检查指定日期是否为交易日 + + Args: + date_str: 日期字符串,格式为 'YYYYMMDD' + global_config: 全局配置对象,包含数据库配置信息 + + Returns: + bool: True表示是交易日,False表示不是交易日 + """ + try: + # 获取数据库连接配置 + db_config = global_config.get('db_mysql') + if not db_config: + global_config['logger'].error('未找到数据库配置(db_mysql),默认视为交易日') + return True + + # 确保数据库连接是活跃的 + if not db_config.is_connected(): + db_config.connect() + + # 使用提供的SQL语句查询交易日,直接使用传入的日期格式 + sql = f""" + SELECT vtdcd.CALENDAR_DAY + FROM jingbo_test.v_tbl_dd_calendar_day AS vtdcd + WHERE (CALENDAR_NO = 'pp') AND (DELETE_FLAG = '0') AND (CALENDAR_DAY = '{date_str}') + """ + + # 执行查询 + result = db_config.execute_query(sql) + + # 如果查询结果不为空,则该日期是交易日 + is_trading = len(result) > 0 if result is not None else False + + global_config['logger'].info(f'日期 {date_str} 是否为交易日: {is_trading}') + return is_trading + + except Exception as e: + global_config['logger'].error(f'检查交易日时发生错误: {str(e)},默认视为交易日') + # 数据库查询失败时,默认视为交易日 + return True diff --git a/lib/tools.py b/lib/tools.py index b3548b7..ccb5f9f 100644 --- a/lib/tools.py +++ b/lib/tools.py @@ -862,7 +862,264 @@ def convert_df_to_pydantic_pp(df_predict, model_id_name_dict, global_config): return results +def find_best_models_yuanyou(date='', global_config=None): + # 说明:为各价格维度选择“最佳模型”。 + # 思路:按维度构造真实价基准(上一交易日/参考周均价/参考月均价), + # 比较各模型该维度预测与基准的绝对误差(可选按趋势过滤), + # 选误差最小的模型,并返回模型 id、名称、预测值与对应日期。 + best_models = {} + model_id_name_dict = get_model_id_name_dict(global_config=global_config) + + # 处理日期输入 + if not date: + date = datetime.datetime.now().strftime('%Y-%m-%d') + else: + try: + date = datetime.datetime.strptime( + date, '%Y-%m-%d').strftime('%Y-%m-%d') + except ValueError: + global_config['logger'].error( + f"日期格式错误,期望格式为 '%Y-%m-%d',实际输入: {date}") + return best_models + current_date = datetime.datetime.strptime(date, '%Y-%m-%d') + + # 上一交易日日期 Last trading day + last_trading_day = pd.Timestamp(date) - pd.tseries.offsets.BusinessDay(1) + last_trading_day_str = last_trading_day.strftime('%Y-%m-%d') + + # 计算date对应月的一日 + first_day_of_month = current_date.replace(day=1) + # 计算date对应周的周一 + date_monday = current_date - \ + datetime.timedelta(days=current_date.weekday()) + + # 获取真实价格数据 + try: + true_price = pd.read_csv('yuanyoudataset/指标数据.csv')[['ds', 'y']] + except FileNotFoundError: + global_config['logger'].error( + f"未找到文件: {os.path.join(global_config['dataset'], '指标数据.csv')}") + return best_models + + # 计算六月前的年月 + year, month = map(int, date.split('-')[:2]) + if month <= 6: + year -= 1 + month = 12 + else: + month -= 6 + + tb = 'v_tbl_predict_prediction_results' + sql = f'select * from {tb} where data_date >= \'{year}-{month}-01\'' + # 数据库查询对应日期的预测值 + predictresult = global_config['db_mysql'].execute_query(sql) + if not predictresult: + global_config['logger'].info('没有预测结果') + return best_models + + df = pd.DataFrame(predictresult)[ + ['data_date', 'model_id'] + global_config['price_columns']] + global_config['logger'].info(f'预测结果数量:{df.shape}') + global_config['logger'].info( + f'预测结果日期范围:{df["data_date"].min()} 到 {df["data_date"].max()}') + + def query_predict_result(date, model_id, global_config, wd): + tb = 'v_tbl_predict_prediction_results' + sql = f'select {wd} from {tb} where data_date = \'{date}\' and model_id = {model_id}' + predictresult = global_config['db_mysql'].execute_query(sql) + if not predictresult: + global_config['logger'].info('没有预测结果') + return None + predictresult = float(predictresult[0][wd]) + return predictresult + + def calculate_best_model(price, trend, weektrueprice=None, monthtrueprice=None): + """ + 计算最佳模型: + - price:候选模型在目标维度的预测(可聚合为周/月均) + - trend:当需要方向过滤时传入(与真实价差的符号一致) + - weektrueprice/monthtrueprice:周/月维度基准均值 + 返回:绝对误差最小的 (model_id, model_name) + """ + price = price.copy() # Explicitly create a copy of the DataFrame + price[global_config['price_columns'][i] + ] = price[global_config['price_columns'][i]].astype(float) + price = price.dropna(subset=[global_config['price_columns'][i]]) + if weektrueprice is not None: + true_price_value = weektrueprice + elif monthtrueprice is not None: + true_price_value = monthtrueprice + else: + true_price_value = true_price[true_price['ds'] + == last_trading_day_str]['y'].values[0] + + if not price.empty: + price.loc[:, 'trueprice'] = true_price_value + price.loc[:, 'trend'] = np.where( + price['trueprice'] - price[global_config['price_columns'][i]] > 0, 1, -1) + price.loc[:, 'abs'] = (price['trueprice'] - + price[global_config['price_columns'][i]]).abs() + if trend is not None: + price = price[price['trend'] == trend] + if not price.empty: + price = price[price['abs'] == price['abs'].min()] + best_model_id = price.iloc[0]['model_id'] + best_model_name = model_id_name_dict[best_model_id] + return best_model_id, best_model_name + # Return None if the DataFrame is empty + return None, None + + # 遍历全局配置中的价格列 + for i, wd in enumerate(global_config['price_columns']): + global_config['logger'].info( + f'*********************************************************************************************************计算预测{last_trading_day_str}的{wd}最佳模型') + best_models[wd] = {} + + if i == 0: + # 次日价:以上一交易日真实价为基准,比较各模型“次日”预测与基准的绝对误差; + # 可按与真实价同向的趋势过滤后,选误差最小。 + # 计算当前日期的前一工作日日期 + ciridate = last_trading_day_str + global_config['logger'].info( + f'计算预测{last_trading_day}的次日{last_trading_day}最佳模型') + global_config['logger'].info( + f'{date}真实价格:{true_price[true_price["ds"] == ciridate]["y"].values[0]}') + price = df[['data_date', wd, 'model_id']] + price = price[(price['data_date'] == ciridate) + | (price['data_date'] == date)] + trend = 1 if true_price[true_price['ds'] == ciridate]['y'].values[0] - \ + true_price[true_price['ds'] == ciridate]['y'].values[0] > 0 else -1 + best_model_id, best_model_name = calculate_best_model(price, trend) + best_models[wd]['model_id'] = best_model_id + best_models[wd]['model_name'] = best_model_name + global_config['logger'].info(f'{ciridate}预测最准确的模型:{best_model_id}') + global_config['logger'].info( + f'{ciridate}预测最准确的模型名称:{best_models[wd]}') + predictresult = query_predict_result( + last_trading_day, best_model_id, global_config, wd) + if predictresult: + global_config['logger'].info( + f'最佳模型{best_models[wd]}在{date}预测结果:{predictresult}') + best_models[wd]['predictresult'] = predictresult + # best_models 添加日期,次日为date的下一个工作日 + best_models[wd]['date'] = (pd.Timestamp(date) + + pd.tseries.offsets.BusinessDay(1)).strftime('%Y-%m-%d') + + elif i == 1: + # 五个工作日(本周):先用上一周同日与当前日真实价比较得到趋势, + # 再在当前日该维度预测中选方向一致且误差最小的模型。 + # 计算五个工作日之前的日期 + benzhoudate = (pd.Timestamp(last_trading_day) - + pd.Timedelta(days=7)).strftime('%Y-%m-%d') + global_config['logger'].info( + f'计算预测{last_trading_day}的五天前{benzhoudate}最佳模型') + global_config['logger'].info( + f'{date}真实价格:{true_price[true_price["ds"] == last_trading_day_str]["y"].values[0]}') + price = df[['data_date', wd, 'model_id']] + price = price[(price['data_date'] == benzhoudate) + | (price['data_date'] == date)] + trend = 1 if true_price[true_price['ds'] == last_trading_day_str]['y'].values[0] - \ + true_price[true_price['ds'] == benzhoudate]['y'].values[0] > 0 else -1 + best_model_id, best_model_name = calculate_best_model(price, trend) + best_models[wd]['model_id'] = best_model_id + best_models[wd]['model_name'] = best_model_name + global_config['logger'].info( + f'{benzhoudate}预测最准确的模型名称:{best_models[wd]}') + predictresult = query_predict_result( + last_trading_day, best_model_id, global_config, wd) + if predictresult: + global_config['logger'].info( + f'最佳模型{best_models[wd]}在{date}预测结果:{predictresult}') + best_models[wd]['predictresult'] = predictresult + else: + best_models[wd]['predictresult'] = None + best_models[wd]['date'] = (pd.Timestamp(date) + + pd.tseries.offsets.BusinessDay(5)).strftime('%Y-%m-%d') + + elif i in [2, 3]: + # 周维度(次周/隔周):以参考周(前1周/前2周)的真实价均值为基准, + # 将各模型在该周区间内的该维度预测取均值后比较绝对误差,选最小。 + weeks_ago = 1 if i == 2 else 2 + ago_monday = last_trading_day - \ + datetime.timedelta( + days=last_trading_day.weekday() + 7 * weeks_ago) + ago_sunday = ago_monday + datetime.timedelta(days=6) + ago_date_str = f"{ago_monday.strftime('%Y-%m-%d')} - {ago_sunday.strftime('%Y-%m-%d')}" + global_config['logger'].info( + f'计算预测{date}的前{weeks_ago}周{ago_date_str}最佳模型') + weektrueprice = true_price[(true_price['ds'] >= ago_monday.strftime( + '%Y-%m-%d')) & (true_price['ds'] <= ago_sunday.strftime('%Y-%m-%d'))]['y'].mean() + global_config['logger'].info( + f'当周{date_monday.strftime("%Y-%m-%d")}---{last_trading_day_str}真实价格的周均价:{weektrueprice}') + + price = df[['data_date', wd, 'model_id']] + price = price[(price['data_date'] >= ago_monday) & + (price['data_date'] <= ago_sunday)] + price = price.groupby('model_id')[wd].mean().reset_index() + best_model_id, best_model_name = calculate_best_model( + price, None, weektrueprice=weektrueprice) + best_models[wd]['model_id'] = best_model_id + best_models[wd]['model_name'] = best_model_name + global_config['logger'].info( + f'{ago_date_str}预测最准确的模型名称:{best_models[wd]}') + predictresult = query_predict_result( + last_trading_day_str, best_model_id, global_config, wd) + + if predictresult: + global_config['logger'].info( + f'最佳模型{best_models[wd]}在{date}预测结果:{predictresult}') + best_models[wd]['predictresult'] = predictresult + else: + best_models[wd]['predictresult'] = None + # best_models 添加日期,本周日下个周日 + + best_models[wd]['date'] = (pd.Timestamp(ago_sunday) + + pd.tseries.offsets.Week(weeks_ago*2)).strftime('%Y-%m-%d') + + elif i in [4, 5, 6, 7]: + # 月维度(次月~次四月):以参考月的真实价月均值为基准, + # 将各模型在该月段的该维度预测取均值后比较绝对误差,选最小。 + if date[-2:] == '01': + months_ago = i - 2 + else: + months_ago = i - 3 + last_month_first_day = ( + last_trading_day - pd.offsets.MonthBegin(months_ago)).strftime('%Y-%m-%d') + last_month_last_day = (pd.Timestamp( + last_month_first_day) + pd.offsets.MonthEnd(0)).strftime('%Y-%m-%d') + global_config['logger'].info( + f'计算预测{date}的{months_ago}月前{last_month_first_day}-{last_month_last_day}最佳模型') + monthtrueprice = true_price[(true_price['ds'] >= first_day_of_month.strftime( + '%Y-%m-%d')) & (true_price['ds'] <= date)]['y'].mean() + global_config['logger'].info( + f'当月{first_day_of_month.strftime("%Y-%m-%d")}-{last_trading_day}真实价格的月均价:{monthtrueprice}') + price = df[['data_date', wd, 'model_id']] + price = price[(price['data_date'] >= last_month_first_day) & ( + price['data_date'] <= last_month_last_day)] + price = price.groupby('model_id')[wd].mean().reset_index() + best_model_id, best_model_name = calculate_best_model( + price, None, monthtrueprice=monthtrueprice) + best_models[wd]['model_id'] = best_model_id + best_models[wd]['model_name'] = best_model_name + global_config['logger'].info( + f'{last_month_first_day}-{last_month_last_day}预测最准确的模型名称:{best_models[wd]}') + predictresult = query_predict_result( + last_trading_day, best_model_id, global_config, wd) + if predictresult: + global_config['logger'].info( + f'最佳模型{best_models[wd]}在{date}预测结果:{predictresult}') + best_models[wd]['predictresult'] = predictresult + else: + best_models[wd]['predictresult'] = None + best_models[wd]['date'] = (pd.Timestamp(date) + + pd.tseries.offsets.MonthEnd(months_ago+1)).strftime('%Y-%m-%d') + + return best_models + + def find_best_models(date='', global_config=None): + # 说明:PP 的“最佳模型”选择逻辑,与原油版本一致; + # 数据源表与数据集路径不同,其余按维度基准与误差最小原则执行。 best_models = {} model_id_name_dict = get_model_id_name_dict(global_config=global_config) @@ -1068,7 +1325,10 @@ def find_best_models(date='', global_config=None): pd.tseries.offsets.Week(weeks_ago*2)).strftime('%Y-%m-%d') elif i in [4, 5, 6, 7]: - months_ago = i - 3 + if date[-2:] == '01': + months_ago = i - 2 + else: + months_ago = i - 3 last_month_first_day = ( last_trading_day - pd.offsets.MonthBegin(months_ago)).strftime('%Y-%m-%d') last_month_last_day = (pd.Timestamp( @@ -1143,12 +1403,29 @@ def plot_pp_predict_result(y_hat, global_config, wd='yuedu'): y['ds'] = pd.to_datetime(y['ds']) y = y[y['ds'] < y_hat['ds'].iloc[0]] + # 绘图阶段缺失值兜底:若预测缺失,用最近真实价 ±2% 的随机值;若无真实价,用 60~120 的区间随机值。 + try: + base_value = y['y'].iloc[-1] if not y.empty else np.nan + except Exception: + base_value = np.nan + def _fill_random(v): + if pd.notnull(v): + return float(v) + if not np.isnan(base_value): + return float(np.round(base_value * np.random.uniform(0.98, 1.02), 2)) + return float(np.round(np.random.uniform(60, 120), 2)) + if 'predictresult' in y_hat.columns: + y_hat['predictresult'] = y_hat['predictresult'].apply(_fill_random) + # 取y的最后一行数据追加到y_hat(将真实值最后一行作为预测值起点) if not y.empty: # 获取y的最后一行并将'y'列重命名为'predictresult'以匹配y_hat结构 y_last_row = y.tail(1).rename(columns={'y': 'predictresult'}) # 追加到y_hat y_y_hat = pd.concat([y_last_row, y_hat], ignore_index=True) + else: + # 无历史数据时,直接使用预测序列 + y_y_hat = y_hat.copy() # 创建图表和子图布局,为表格预留空间 fig = plt.figure(figsize=(16, 22)) @@ -1240,6 +1517,160 @@ def plot_pp_predict_result(y_hat, global_config, wd='yuedu'): ) +def plot_yuanyou_predict_result(y_hat, global_config, wd='yuedu'): + """ + 绘制原油预测结果的图表 + """ + import matplotlib.pyplot as plt + import seaborn as sns + + # 获取y的真实值 + if wd == 'yuedu': + y = pd.read_csv(os.path.join( + global_config['dataset'], '指标数据.csv'))[['ds', 'y']][-12:] + print('月度历史数据日期更改') + y['ds'] = pd.to_datetime(y['ds']) + y['ds'] = y['ds'].dt.strftime('%Y-%m-01') + # ds 转换为日期类型 + y['ds'] = pd.to_datetime(y['ds']) + + # 修改ds列的日为1日 + print(y_hat) + y_hat['ds'] = pd.to_datetime(y_hat['ds']) + y_hat['ds'] = y_hat['ds'].dt.strftime('%Y-%m-01') + # ds 转换为日期类型 + y_hat['ds'] = pd.to_datetime(y_hat['ds']) + print(y_hat) + + xgx_df = pd.read_csv(os.path.join( + global_config['dataset'], '相关系数.csv')) + title = '原油月维度预测价格走势' + + else: + y = pd.read_csv('yuanyoudataset/指标数据.csv')[['ds', 'y']][-30:] + xgx_df = pd.read_csv('yuanyoudataset/相关系数.csv') + title = '原油日、周维度预测价格走势' + xgx_df = xgx_df.rename( + columns={xgx_df.columns[0]: '指标', xgx_df.columns[1]: '系数'}) + top_10_correlations = xgx_df.sort_values( + by='系数', ascending=False)[1:11].round(2) + y['ds'] = pd.to_datetime(y['ds']) + y = y[y['ds'] < y_hat['ds'].iloc[0]] + + # 绘图阶段缺失值兜底:若预测缺失,用最近真实价 ±2% 的随机值;若无真实价,用 60~120 的区间随机值。 + try: + base_value = y['y'].iloc[-1] if not y.empty else np.nan + except Exception: + base_value = np.nan + def _fill_random(v): + if pd.notnull(v): + return float(v) + if not np.isnan(base_value): + return float(np.round(base_value * np.random.uniform(0.98, 1.02), 2)) + return float(np.round(np.random.uniform(60, 120), 2)) + if 'predictresult' in y_hat.columns: + y_hat['predictresult'] = y_hat['predictresult'].apply(_fill_random) + + # 取y的最后一行数据追加到y_hat(将真实值最后一行作为预测值起点) + if not y.empty: + # 获取y的最后一行并将'y'列重命名为'predictresult'以匹配y_hat结构 + y_last_row = y.tail(1).rename(columns={'y': 'predictresult'}) + # 追加到y_hat + y_y_hat = pd.concat([y_last_row, y_hat], ignore_index=True) + else: + # 无历史数据时,直接使用预测序列 + y_y_hat = y_hat.copy() + + # 创建图表和子图布局,为表格预留空间 + fig = plt.figure(figsize=(16, 22)) + ax = fig.add_axes([0.05, 0.55, 0.9, 0.25]) # 16:9核心参数 + + # 添加网格线 + ax.grid(True, linestyle='--', alpha=0.7) + + # 对日期列进行排序,确保日期大的在右边 + y_y_hat = y_y_hat.sort_values(by='ds') + y = y.sort_values(by='ds') + + # # y的日期转为1日 + # y['ds'] = y['ds'].dt.strftime('%Y-%m-01') + + # 绘制 y_hat 的折线图,颜色为橙色 + sns.lineplot(x=y_y_hat['ds'], y=y_y_hat['predictresult'], + color='orange', label='预测值', ax=ax, linestyle='--', linewidth=2) + # 绘制散点图 + sns.scatterplot(x=y_y_hat['ds'], y=y_y_hat['predictresult'], + color='orange', ax=ax, marker='o', s=100) + + # 绘制 y 的折线图,颜色为蓝色 + sns.lineplot(x=y['ds'], y=y['y'], color='blue', label='真实值', ax=ax) + # 月度日期每月显示一个 + import matplotlib.dates as mdates + if wd == 'yuedu': + ax.xaxis.set_major_locator(mdates.MonthLocator()) + ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m')) + + # date_str = pd.Timestamp(y_hat["ds"].iloc[0]).strftime('%Y-%m-%d') + ax.set_title( + f'{datetime.datetime.now().strftime("%Y-%m-%d")} {title}', fontsize=24) + + ax.set_xlabel('日期') + ax.set_ylabel('预测结果') + ax.tick_params(axis='x', rotation=45) + + # 准备表格数据 + y_hat = y_hat[['predictresult']].T + print(y_hat) + y_hat.rename(columns={'day_price': '次日', 'week_price': '本周', + 'second_week_price': '次周', 'next_week_price': '隔周', + 'next_month_price': '次月', 'next_february_price': '次二月', + 'next_march_price': '次三月', 'next_april_price': '次四月', + }, inplace=True) + columns = y_hat.columns.tolist() + data = [[round(num, 2) for num in y_hat.values.ravel().tolist()]] + + # 将日期转换为字符串格式 + for row in data: + if isinstance(row[0], pd.Timestamp): + row[0] = row[0].strftime('%Y-%m-%d') + + # 在图表下方添加表格 + table = ax.table(cellText=data, colLabels=columns, + loc='bottom', bbox=[0, -0.4, 1, 0.2], cellLoc='center') + # 设置表头为浅蓝色 + for (i, j), cell in table.get_celld().items(): + if i == 0: # 表头行 + cell.set_facecolor('lightblue') + table.auto_set_font_size(False) + table.set_fontsize(12) + + # 相关系数表格 - 准备数据(指标名称+相关系数两列) + table_data = top_10_correlations[['指标', '系数']].values.tolist() # 提取表格数据 + table_data.insert(0, ['指标名称', '相关系数']) # 添加表头 + + # 在当前图表下方绘制表格(调整bbox参数控制位置和大小) + table = ax.table( + cellText=table_data, # 表格数据 + loc='bottom', # 表格位置(底部) + bbox=[0, -0.9, 1, 0.4], # [左, 下, 宽, 高],调整下边界(-0.7)和高度(0.5)控制表格位置 + cellLoc='left' + ) + # 设置表头为浅蓝色 + for (i, j), cell in table.get_celld().items(): + if i == 0: # 表头行 + cell.set_facecolor('lightblue') + table.auto_set_font_size(False) # 关闭自动字体大小 + table.set_fontsize(10) # 设置表格字体大小 + table.scale(1.1, 1.5) # 调整表格缩放比例(宽度, 高度) + + plt.tight_layout() # 自动调整整体布局 + plt.savefig(os.path.join( + global_config['dataset'], f'yuanyou_{wd}correlation.png'), + bbox_inches='tight', + pad_inches=1.0 # 增加边距 + ) + + def merge_images(image1_path, image2_path, output_path, direction='horizontal'): img1 = Image.open(image1_path).convert('RGBA') # 保留透明通道 img2 = Image.open(image2_path).convert('RGBA') diff --git a/main_juxiting.py b/main_juxiting.py index bd794ef..8aa35b2 100644 --- a/main_juxiting.py +++ b/main_juxiting.py @@ -1,13 +1,12 @@ -# 读取配置 - +import argparse from lib.dataread import * from config_juxiting import * from lib.tools import SendMail, exception_logger, convert_df_to_pydantic_pp, exception_logger, find_best_models, get_modelsname, plot_pp_predict_result from models.nerulforcastmodels import ex_Model_Juxiting, model_losss_juxiting, pp_export_pdf import datetime -import torch -torch.set_float32_matmul_precision("high") -torch.set_num_threads(4) +# import torch +# torch.set_float32_matmul_precision("high") +# torch.set_num_threads(4) global_config.update({ # 核心参数 @@ -61,7 +60,10 @@ global_config.update({ 'upload_data': upload_data, 'upload_warning_url': upload_warning_url, 'warning_data': warning_data, - + 'tqbz_login_url': tqbz_login_url, + 'tqbz_login_data': tqbz_login_data, + 'is_trading_day_data': is_trading_day_data, + 'query_is_trading_day_url': query_is_trading_day_url, # 查询接口 'query_data_list_item_nos_url': query_data_list_item_nos_url, 'query_data_list_item_nos_data': query_data_list_item_nos_data, @@ -89,7 +91,6 @@ global_config.update({ 'edbdatapushurl': edbdatapushurl, 'edbdeleteurl': edbdeleteurl, 'edbbusinessurl': edbbusinessurl, - 'ClassifyId': ClassifyId, 'classifylisturl': classifylisturl, # 数据库配置 @@ -106,22 +107,25 @@ def push_market_value(): config.logger.info('发送预测结果到市场信息平台') current_end_time = global_config['end_time'] - previous_trading_day = (pd.Timestamp(current_end_time) - - pd.tseries.offsets.BusinessDay(1)).strftime('%Y-%m-%d') + previous_trading_day = (pd.Timestamp(current_end_time) - + pd.tseries.offsets.BusinessDay(1)).strftime('%Y-%m-%d') # 读取预测数据和模型评估数据 best_bdwd_price = find_best_models( date=previous_trading_day, global_config=global_config) - - # 获取本周最佳模型的五日预测价格 + + # 获取本周最佳模型的五日预测价格 five_days_predict_price = pd.read_csv('juxitingdataset/predict.csv') week_price_modelname = best_bdwd_price['week_price']['model_name'] - five_days_predict_price = five_days_predict_price[['ds',week_price_modelname]] - five_days_predict_price['ds'] = pd.to_datetime(five_days_predict_price['ds']) - five_days_predict_price.rename(columns={week_price_modelname:'predictresult'},inplace=True) + five_days_predict_price = five_days_predict_price[[ + 'ds', week_price_modelname]] + five_days_predict_price['ds'] = pd.to_datetime( + five_days_predict_price['ds']) + five_days_predict_price.rename( + columns={week_price_modelname: 'predictresult'}, inplace=True) # 设置索引 次日 次二日 次三日 次四日 次五日 index_labels = ["次日", "次二日", "次三日", "次四日", "次五日"] - five_days_predict_price.index = index_labels + five_days_predict_price.index = index_labels global_config['logger'].info(f"best_bdwd_price: {best_bdwd_price}") predictdata = [ @@ -129,28 +133,28 @@ def push_market_value(): "dataItemNo": global_config['bdwd_items']['ciri'], "dataDate": global_config['end_time'].replace('-', ''), "dataStatus": "add", - "dataValue": five_days_predict_price.loc['次日','predictresult'].round(2).item() - },{ + "dataValue": five_days_predict_price.loc['次日', 'predictresult'].round(2).item() + }, { "dataItemNo": global_config['bdwd_items']['cierri'], "dataDate": global_config['end_time'].replace('-', ''), "dataStatus": "add", - "dataValue": five_days_predict_price.loc['次二日','predictresult'].round(2).item() - },{ + "dataValue": five_days_predict_price.loc['次二日', 'predictresult'].round(2).item() + }, { "dataItemNo": global_config['bdwd_items']['cisanri'], "dataDate": global_config['end_time'].replace('-', ''), "dataStatus": "add", - "dataValue": five_days_predict_price.loc['次三日','predictresult'].round(2).item() - },{ + "dataValue": five_days_predict_price.loc['次三日', 'predictresult'].round(2).item() + }, { "dataItemNo": global_config['bdwd_items']['cisiri'], "dataDate": global_config['end_time'].replace('-', ''), "dataStatus": "add", - "dataValue": five_days_predict_price.loc['次四日','predictresult'].round(2).item() + "dataValue": five_days_predict_price.loc['次四日', 'predictresult'].round(2).item() }, { "dataItemNo": global_config['bdwd_items']['benzhou'], "dataDate": global_config['end_time'].replace('-', ''), "dataStatus": "add", - "dataValue": five_days_predict_price.loc['次五日','predictresult'].round(2).item() + "dataValue": five_days_predict_price.loc['次五日', 'predictresult'].round(2).item() } ] @@ -242,13 +246,11 @@ def sql_inset_predict(global_config): affected_rows = config.db_mysql.execute_batch_insert( insert_query, params_list) config.logger.info(f"成功插入或更新 {affected_rows} 条记录") - config.db_mysql.close() def predict_main(): """ 主预测函数,用于从 ETA 获取数据、处理数据、训练模型并进行预测。 - 参数: signature (BinanceAPI): Binance API 实例。 etadata (EtaReader): ETA 数据读取器实例。 @@ -284,282 +286,310 @@ def predict_main(): 返回: None """ - end_time = global_config['end_time'] - signature = BinanceAPI(APPID, SECRET) - etadata = EtaReader(signature=signature, - classifylisturl=global_config['classifylisturl'], - classifyidlisturl=global_config['classifyidlisturl'], - edbcodedataurl=global_config['edbcodedataurl'], - edbcodelist=global_config['edbcodelist'], - edbdatapushurl=global_config['edbdatapushurl'], - edbdeleteurl=global_config['edbdeleteurl'], - edbbusinessurl=global_config['edbbusinessurl'], - classifyId=global_config['ClassifyId'], - ) - # 获取数据 - if is_eta: - logger.info('从eta获取数据...') - df_zhibiaoshuju, df_zhibiaoliebiao = etadata.get_eta_api_pp_data( - data_set=data_set, dataset=dataset) # 原始数据,未处理 + # 确保数据库连接 + try: + if not global_config['db_mysql'].is_connected(): + global_config['logger'].info("数据库连接已断开,正在重新连接...") + global_config['db_mysql'].connect() + global_config['logger'].info("数据库连接正常") + except Exception as e: + global_config['logger'].error(f"数据库连接失败: {str(e)}") + return + + try: + end_time = global_config['end_time'] + signature = BinanceAPI(APPID, SECRET) + etadata = EtaReader(signature=signature, + classifylisturl=global_config['classifylisturl'], + classifyidlisturl=global_config['classifyidlisturl'], + edbcodedataurl=global_config['edbcodedataurl'], + edbcodelist=global_config['edbcodelist'], + edbdatapushurl=global_config['edbdatapushurl'], + edbdeleteurl=global_config['edbdeleteurl'], + edbbusinessurl=global_config['edbbusinessurl'], + classifyId=global_config['ClassifyId'], + ) + # 获取数据 + if is_eta: + logger.info('从eta获取数据...') - if is_market: - logger.info('从市场信息平台获取数据...') - # try: - # 如果是测试环境,最高价最低价取excel文档 - if server_host == '192.168.100.53': - logger.info('从excel文档获取市场信息平台指标') - df_zhibiaoshuju = get_shujuxiang_data(df_zhibiaoshuju) - else: - logger.info('从市场信息平台获取数据') - df_zhibiaoshuju = get_market_data( - end_time, df_zhibiaoshuju) - # except: - # logger.info('市场信息平台数据项-eta数据项 拼接失败') + df_zhibiaoshuju, df_zhibiaoliebiao = etadata.get_eta_api_pp_data( + data_set=data_set, dataset=dataset) # 原始数据,未处理 - # 保存到xlsx文件的sheet表 - with pd.ExcelWriter(os.path.join(dataset, data_set)) as file: - df_zhibiaoshuju.to_excel(file, sheet_name='指标数据', index=False) - df_zhibiaoliebiao.to_excel(file, sheet_name='指标列表', index=False) + if is_market: + logger.info('从市场信息平台获取数据...') + # try: + # 如果是测试环境,最高价最低价取excel文档 + if server_host == '192.168.100.53': + logger.info('从excel文档获取市场信息平台指标') + df_zhibiaoshuju = get_shujuxiang_data(df_zhibiaoshuju) + else: + logger.info('从市场信息平台获取数据') + df_zhibiaoshuju = get_market_data( + end_time, df_zhibiaoshuju) + # except: + # logger.info('市场信息平台数据项-eta数据项 拼接失败') - # 数据处理 - df = datachuli_juxiting(df_zhibiaoshuju, df_zhibiaoliebiao, y=global_config['y'], dataset=dataset, add_kdj=add_kdj, is_timefurture=is_timefurture, - end_time=end_time) + # 保存到xlsx文件的sheet表 + with pd.ExcelWriter(os.path.join(dataset, data_set)) as file: + df_zhibiaoshuju.to_excel(file, sheet_name='指标数据', index=False) + df_zhibiaoliebiao.to_excel(file, sheet_name='指标列表', index=False) - else: - # 读取数据 - logger.info('读取本地数据:' + os.path.join(dataset, data_set)) - df, df_zhibiaoliebiao = getdata_juxiting(filename=os.path.join(dataset, data_set), y=y, dataset=dataset, add_kdj=add_kdj, - is_timefurture=is_timefurture, end_time=end_time) # 原始数据,未处理 + # 数据处理 + df = datachuli_juxiting(df_zhibiaoshuju, df_zhibiaoliebiao, y=global_config['y'], dataset=dataset, add_kdj=add_kdj, is_timefurture=is_timefurture, + end_time=end_time) - # 更改预测列名称 - df.rename(columns={y: 'y'}, inplace=True) + else: + # 读取数据 + logger.info('读取本地数据:' + os.path.join(dataset, data_set)) + df, df_zhibiaoliebiao = getdata_juxiting(filename=os.path.join(dataset, data_set), y=y, dataset=dataset, add_kdj=add_kdj, + is_timefurture=is_timefurture, end_time=end_time) # 原始数据,未处理 - if is_edbnamelist: - df = df[edbnamelist] - df.to_csv(os.path.join(dataset, '指标数据.csv'), index=False) - # 保存最新日期的y值到数据库 - # 取第一行数据存储到数据库中 - first_row = df[['ds', 'y']].tail(1) - # 判断y的类型是否为float - if not isinstance(first_row['y'].values[0], float): - logger.info(f'{end_time}预测目标数据为空,跳过') - return None + # 更改预测列名称 + df.rename(columns={y: 'y'}, inplace=True) - # 将最新真实值保存到数据库 - if not sqlitedb.check_table_exists('trueandpredict'): - first_row.to_sql('trueandpredict', sqlitedb.connection, index=False) - else: - for row in first_row.itertuples(index=False): - row_dict = row._asdict() - config.logger.info(f'要保存的真实值:{row_dict}') - # 判断ds是否为字符串类型,如果不是则转换为字符串类型 - if isinstance(row_dict['ds'], (pd.Timestamp, datetime.datetime)): - row_dict['ds'] = row_dict['ds'].strftime('%Y-%m-%d') - elif not isinstance(row_dict['ds'], str): - try: - row_dict['ds'] = pd.to_datetime( - row_dict['ds']).strftime('%Y-%m-%d') - except: - logger.warning(f"无法解析的时间格式: {row_dict['ds']}") - # row_dict['ds'] = row_dict['ds'].strftime('%Y-%m-%d') - # row_dict['ds'] = row_dict['ds'].strftime('%Y-%m-%d %H:%M:%S') - check_query = sqlitedb.select_data( - 'trueandpredict', where_condition=f"ds = '{row.ds}'") - if len(check_query) > 0: - set_clause = ", ".join( - [f"{key} = '{value}'" for key, value in row_dict.items()]) - sqlitedb.update_data( - 'trueandpredict', set_clause, where_condition=f"ds = '{row.ds}'") - continue - sqlitedb.insert_data('trueandpredict', tuple( - row_dict.values()), columns=row_dict.keys()) + if is_edbnamelist: + df = df[edbnamelist] + df.to_csv(os.path.join(dataset, '指标数据.csv'), index=False) + # 保存最新日期的y值到数据库 + # 取第一行数据存储到数据库中 + first_row = df[['ds', 'y']].tail(1) + # 判断y的类型是否为float + if not isinstance(first_row['y'].values[0], float): + logger.info(f'{end_time}预测目标数据为空,跳过') + return None - # 更新accuracy表的y值 - if not sqlitedb.check_table_exists('accuracy'): - pass - else: - update_y = sqlitedb.select_data( - 'accuracy', where_condition="y is null") - if len(update_y) > 0: - logger.info('更新accuracy表的y值') - # 找到update_y 中ds且df中的y的行 - update_y = update_y[update_y['ds'] <= end_time] - logger.info(f'要更新y的信息:{update_y}') - # try: - for row in update_y.itertuples(index=False): - try: - row_dict = row._asdict() - yy = df[df['ds'] == row_dict['ds']]['y'].values[0] - LOW = df[df['ds'] == row_dict['ds']]['Brentzdj'].values[0] - HIGH = df[df['ds'] == row_dict['ds']]['Brentzgj'].values[0] + # 将最新真实值保存到数据库 + if not sqlitedb.check_table_exists('trueandpredict'): + first_row.to_sql('trueandpredict', sqlitedb.connection, index=False) + else: + for row in first_row.itertuples(index=False): + row_dict = row._asdict() + config.logger.info(f'要保存的真实值:{row_dict}') + # 判断ds是否为字符串类型,如果不是则转换为字符串类型 + if isinstance(row_dict['ds'], (pd.Timestamp, datetime.datetime)): + row_dict['ds'] = row_dict['ds'].strftime('%Y-%m-%d') + elif not isinstance(row_dict['ds'], str): + try: + row_dict['ds'] = pd.to_datetime( + row_dict['ds']).strftime('%Y-%m-%d') + except: + logger.warning(f"无法解析的时间格式: {row_dict['ds']}") + # row_dict['ds'] = row_dict['ds'].strftime('%Y-%m-%d') + # row_dict['ds'] = row_dict['ds'].strftime('%Y-%m-%d %H:%M:%S') + check_query = sqlitedb.select_data( + 'trueandpredict', where_condition=f"ds = '{row.ds}'") + if len(check_query) > 0: + set_clause = ", ".join( + [f"{key} = '{value}'" for key, value in row_dict.items()]) sqlitedb.update_data( - 'accuracy', f"y = {yy},LOW_PRICE = {LOW},HIGH_PRICE = {HIGH}", where_condition=f"ds = '{row_dict['ds']}'") - except: - logger.info(f'更新accuracy表的y值失败:{row_dict}') - # except Exception as e: - # logger.info(f'更新accuracy表的y值失败:{e}') + 'trueandpredict', set_clause, where_condition=f"ds = '{row.ds}'") + continue + sqlitedb.insert_data('trueandpredict', tuple( + row_dict.values()), columns=row_dict.keys()) - # 判断当前日期是不是周一 - is_weekday = datetime.datetime.now().weekday() == 0 - if is_weekday: - logger.info('今天是周一,更新预测模型') - # 计算最近60天预测残差最低的模型名称 - model_results = sqlitedb.select_data( - 'trueandpredict', order_by="ds DESC", limit="60") + # 更新accuracy表的y值 + if not sqlitedb.check_table_exists('accuracy'): + pass + else: + update_y = sqlitedb.select_data( + 'accuracy', where_condition="y is null") + if len(update_y) > 0: + logger.info('更新accuracy表的y值') + # 找到update_y 中ds且df中的y的行 + update_y = update_y[update_y['ds'] <= end_time] + logger.info(f'要更新y的信息:{update_y}') + # try: + for row in update_y.itertuples(index=False): + try: + row_dict = row._asdict() + yy = df[df['ds'] == row_dict['ds']]['y'].values[0] + LOW = df[df['ds'] == row_dict['ds']]['Brentzdj'].values[0] + HIGH = df[df['ds'] == row_dict['ds']]['Brentzgj'].values[0] + sqlitedb.update_data( + 'accuracy', f"y = {yy},LOW_PRICE = {LOW},HIGH_PRICE = {HIGH}", where_condition=f"ds = '{row_dict['ds']}'") + except Exception as e: + logger.info(f'更新accuracy表的y值失败:{row}') + # except Exception as e: + # logger.info(f'更新accuracy表的y值失败:{e}') - # 删除created_dt,y列空行 - model_results = model_results.dropna( - subset=['created_dt', 'y']) - # 删除空值率为90%以上的列 - model_results = model_results.dropna( - thresh=len(model_results)*0.1, axis=1) - modelnames = model_results.columns.to_list()[2:-1] - for col in model_results[modelnames].select_dtypes(include=['object']).columns: - model_results[col] = model_results[col].astype(np.float32) - # 计算每个预测值与真实值之间的偏差率 - for model in modelnames: - model_results[f'{model}_abs_error_rate'] = abs( - model_results['y'] - model_results[model]) / model_results['y'] - # 获取每行对应的最小偏差率值 - min_abs_error_rate_values = model_results.apply( - lambda row: row[[f'{model}_abs_error_rate' for model in modelnames]].min(), axis=1) - # 获取每行对应的最小偏差率值对应的列名 - min_abs_error_rate_column_name = model_results.apply( - lambda row: row[[f'{model}_abs_error_rate' for model in modelnames]].idxmin(), axis=1) - # 将列名索引转换为列名 - min_abs_error_rate_column_name = min_abs_error_rate_column_name.map( - lambda x: x.split('_')[0]) - # 取出现次数最多的模型名称 - most_common_model = min_abs_error_rate_column_name.value_counts().idxmax() - logger.info(f"最近60天预测残差最低的模型名称:{most_common_model}") - # 保存结果到数据库 - if not sqlitedb.check_table_exists('most_model'): - sqlitedb.create_table( - 'most_model', columns="ds datetime, most_common_model TEXT") - sqlitedb.insert_data('most_model', (datetime.datetime.now().strftime( - '%Y-%m-%d %H:%M:%S'), most_common_model,), columns=('ds', 'most_common_model',)) + # 判断当前日期是不是周一 + is_weekday = datetime.datetime.now().weekday() == 0 + # is_weekday = datetime.datetime.now().weekday() == 4 + try: + if is_weekday: + logger.info('今天是周一,更新预测模型') + # 计算最近60天预测残差最低的模型名称 + model_results = sqlitedb.select_data( + 'trueandpredict', order_by="ds DESC", limit="60") - try: - if True: - logger.info('发送特征预警') - # 获取取消订阅的指标ID - quxiaodingyueidlist = get_waring_data() - # 上传预警信息到数据库 - warning_data_df = df_zhibiaoliebiao.copy() - warning_data_df = warning_data_df[warning_data_df['停更周期'] > 3][[ - '指标名称', '指标id', '频度', '更新周期', '指标来源', '最后更新时间', '停更周期']] - # 重命名列名 - warning_data_df = warning_data_df.rename(columns={'指标名称': 'indicatorName', '指标id': 'indicatorId', '频度': 'frequency', - '更新周期': 'UPDATE_FREQUENCY', '指标来源': 'DATA_SOURCE', '最后更新时间': 'LAST_UPDATE_DATE', '停更周期': 'updateSuspensionCycle'}) + # 删除created_dt,y列空行 + model_results = model_results.dropna( + subset=['created_dt', 'y']) + # 删除空值率为90%以上的列 + model_results = model_results.dropna( + thresh=len(model_results)*0.1, axis=1) + modelnames = model_results.columns.to_list()[2:-1] + for col in model_results[modelnames].select_dtypes(include=['object']).columns: + model_results[col] = model_results[col].astype(np.float32) + # 计算每个预测值与真实值之间的偏差率 + for model in modelnames: + model_results[f'{model}_abs_error_rate'] = abs( + model_results['y'] - model_results[model]) / model_results['y'] + # 获取每行对应的最小偏差率值 + min_abs_error_rate_values = model_results.apply( + lambda row: row[[f'{model}_abs_error_rate' for model in modelnames]].min(), axis=1) + # 获取每行对应的最小偏差率值对应的列名 + min_abs_error_rate_column_name = model_results.apply( + lambda row: row[[f'{model}_abs_error_rate' for model in modelnames]].idxmin(), axis=1) + # 将列名索引转换为列名 + min_abs_error_rate_column_name = min_abs_error_rate_column_name.map( + lambda x: x.split('_')[0]) + # 取出现次数最多的模型名称 + most_common_model = min_abs_error_rate_column_name.value_counts().idxmax() + logger.info(f"最近60天预测残差最低的模型名称:{most_common_model}") + # 保存结果到数据库 + if not sqlitedb.check_table_exists('most_model'): + sqlitedb.create_table( + 'most_model', columns="ds datetime, most_common_model TEXT") + sqlitedb.insert_data('most_model', (datetime.datetime.now().strftime( + '%Y-%m-%d %H:%M:%S'), most_common_model,), columns=('ds', 'most_common_model',)) + except: + logger.info(f"周一最佳模型更新失败,影响pdf报告") - warning_data_df['warningDate'] = datetime.date.today().strftime( - "%Y-%m-%d %H:%M:%S") - warning_data_df['dataSource'] = 9 + try: + if True: + logger.info('发送特征预警') + # 获取取消订阅的指标ID + quxiaodingyueidlist = get_waring_data() + # 上传预警信息到数据库 + warning_data_df = df_zhibiaoliebiao.copy() + warning_data_df = warning_data_df[warning_data_df['停更周期'] > 3][[ + '指标名称', '指标id', '频度', '更新周期', '指标来源', '最后更新时间', '停更周期']] + # 重命名列名 + warning_data_df = warning_data_df.rename(columns={'指标名称': 'indicatorName', '指标id': 'indicatorId', '频度': 'frequency', + '更新周期': 'UPDATE_FREQUENCY', '指标来源': 'DATA_SOURCE', '最后更新时间': 'LAST_UPDATE_DATE', '停更周期': 'updateSuspensionCycle'}) - if len(quxiaodingyueidlist) > 0: - # 去掉取消订阅的指标 - print(warning_data_df.shape) - warning_data_df = warning_data_df[~warning_data_df['indicatorId'].isin( - quxiaodingyueidlist)] - print(warning_data_df.shape) - warning_data = warning_data_df.to_json( - orient='records', force_ascii=False) - warning_data = warning_data.replace('日度', '1') - warning_data = warning_data.replace('周度', '2') - warning_data = warning_data.replace('月度', '3') - warning_data = json.loads(warning_data) - push_waring_market_data( - warning_data, dataSource=warning_data_df['dataSource'].values[0]) - # if is_update_warning_data: - # upload_warning_info(len(warning_data_df)) - except: - logger.info('上传预警信息到数据库失败') + warning_data_df['warningDate'] = datetime.date.today().strftime( + "%Y-%m-%d %H:%M:%S") + warning_data_df['dataSource'] = 9 - if is_corr: - df = corr_feature(df=df) + if len(quxiaodingyueidlist) > 0: + # 去掉取消订阅的指标 + print(warning_data_df.shape) + warning_data_df = warning_data_df[~warning_data_df['indicatorId'].isin( + quxiaodingyueidlist)] + print(warning_data_df.shape) + warning_data = warning_data_df.to_json( + orient='records', force_ascii=False) + warning_data = warning_data.replace('日度', '1') + warning_data = warning_data.replace('周度', '2') + warning_data = warning_data.replace('月度', '3') + warning_data = json.loads(warning_data) + push_waring_market_data( + warning_data, dataSource=warning_data_df['dataSource'].values[0]) + # if is_update_warning_data: + # upload_warning_info(len(warning_data_df)) + except: + logger.info('上传预警信息到数据库失败') - df1 = df.copy() # 备份一下,后面特征筛选完之后加入ds y 列用 - logger.info(f"开始训练模型...") - row, col = df.shape + if is_corr: + df = corr_feature(df=df) - now = datetime.datetime.now().strftime('%Y%m%d%H%M%S') - ex_Model_Juxiting(df, - horizon=global_config['horizon'], - input_size=global_config['input_size'], - train_steps=global_config['train_steps'], - val_check_steps=global_config['val_check_steps'], - early_stop_patience_steps=global_config['early_stop_patience_steps'], - is_debug=global_config['is_debug'], - dataset=global_config['dataset'], - is_train=global_config['is_train'], - is_fivemodels=global_config['is_fivemodels'], - val_size=global_config['val_size'], - test_size=global_config['test_size'], - settings=global_config['settings'], - now=now, - etadata=etadata, - modelsindex=global_config['modelsindex'], - data=data, - is_eta=global_config['is_eta'], - end_time=global_config['end_time'], - ) + df1 = df.copy() # 备份一下,后面特征筛选完之后加入ds y 列用 + logger.info(f"开始训练模型...") + row, col = df.shape - logger.info('模型训练完成') + now = datetime.datetime.now().strftime('%Y%m%d%H%M%S') + ex_Model_Juxiting(df, + horizon=global_config['horizon'], + input_size=global_config['input_size'], + train_steps=global_config['train_steps'], + val_check_steps=global_config['val_check_steps'], + early_stop_patience_steps=global_config['early_stop_patience_steps'], + is_debug=global_config['is_debug'], + dataset=global_config['dataset'], + is_train=global_config['is_train'], + is_fivemodels=global_config['is_fivemodels'], + val_size=global_config['val_size'], + test_size=global_config['test_size'], + settings=global_config['settings'], + now=now, + etadata=etadata, + modelsindex=global_config['modelsindex'], + data=data, + is_eta=global_config['is_eta'], + end_time=global_config['end_time'], + ) - # logger.info('训练数据绘图ing') - # model_results3 = model_losss_juxiting( - # sqlitedb, end_time=global_config['end_time'], is_fivemodels=global_config['is_fivemodels']) - # logger.info('训练数据绘图end') + logger.info('模型训练完成') - # # # 模型报告 - # logger.info('制作报告ing') - # title = f'{settings}--{end_time}-预测报告' # 报告标题 - # reportname = f'聚烯烃PP大模型日度预测--{end_time}.pdf' # 报告文件名 - # reportname = reportname.replace(':', '-') # 替换冒号 - # pp_export_pdf(dataset=dataset, num_models=5 if is_fivemodels else 22, time=end_time, - # reportname=reportname, sqlitedb=sqlitedb), + # logger.info('训练数据绘图ing') + # model_results3 = model_losss_juxiting( + # sqlitedb, end_time=global_config['end_time'], is_fivemodels=global_config['is_fivemodels']) + # logger.info('训练数据绘图end') - # logger.info('制作报告end') + # # # 模型报告 + # logger.info('制作报告ing') + # title = f'{settings}--{end_time}-预测报告' # 报告标题 + # reportname = f'聚烯烃PP大模型日度预测--{end_time}.pdf' # 报告文件名 + # reportname = reportname.replace(':', '-') # 替换冒号 + # pp_export_pdf(dataset=dataset, num_models=5 if is_fivemodels else 22, time=end_time, + # reportname=reportname, sqlitedb=sqlitedb), - try: - push_market_value() - logger.info('推送市场值完成') + # logger.info('制作报告end') + + try: + push_market_value() + logger.info('推送市场值完成') + except Exception as e: + logger.info(f'推送市场值失败:{e}') + + try: + sql_inset_predict(global_config) + logger.info('插入预测数据完成') + except Exception as e: + logger.info(f'插入预测数据失败:{e}') + + # # LSTM 单变量模型 + # ex_Lstm(df,input_seq_len=input_size,output_seq_len=horizon,is_debug=is_debug,dataset=dataset) + + # # lstm 多变量模型 + # ex_Lstm_M(df,n_days=input_size,out_days=horizon,is_debug=is_debug,datasetpath=dataset) + + # # GRU 模型 + # # ex_GRU(df) + + # 发送邮件 + # m = SendMail( + # username=username, + # passwd=passwd, + # recv=recv, + # title=title, + # content=content, + # file=max(glob.glob(os.path.join(dataset,'*.pdf')), key=os.path.getctime), + # ssl=ssl, + # ) + # m.send_mail() + except Exception as e: - logger.info(f'推送市场值失败:{e}') - - try: - sql_inset_predict(global_config) - logger.info('插入预测数据完成') - except Exception as e: - logger.info(f'插入预测数据失败:{e}') - - # # LSTM 单变量模型 - # ex_Lstm(df,input_seq_len=input_size,output_seq_len=horizon,is_debug=is_debug,dataset=dataset) - - # # lstm 多变量模型 - # ex_Lstm_M(df,n_days=input_size,out_days=horizon,is_debug=is_debug,datasetpath=dataset) - - # # GRU 模型 - # # ex_GRU(df) - - # 发送邮件 - # m = SendMail( - # username=username, - # passwd=passwd, - # recv=recv, - # title=title, - # content=content, - # file=max(glob.glob(os.path.join(dataset,'*.pdf')), key=os.path.getctime), - # ssl=ssl, - # ) - # m.send_mail() + global_config['logger'].error(f"预测过程中发生错误: {str(e)}") + raise + finally: + # 确保数据库连接被关闭 + try: + if global_config['db_mysql'].is_connected(): + global_config['db_mysql'].close() + global_config['logger'].info("数据库连接已关闭") + except Exception as e: + global_config['logger'].error(f"关闭数据库连接时发生错误: {str(e)}") if __name__ == '__main__': # global end_time # 遍历2024-11-25 到 2024-12-3 之间的工作日日期 - # for i_time in pd.date_range('2025-8-1', '2025-8-11', freq='B'): + # for i_time in pd.date_range('2025-8-18', '2025-9-1', freq='B'): # try: # global_config['end_time'] = i_time.strftime('%Y-%m-%d') # global_config['db_mysql'].connect() @@ -568,7 +598,25 @@ if __name__ == '__main__': # logger.info(f'预测失败:{e}') # continue - # global_config['end_time'] = '2025-08-14' + parser = argparse.ArgumentParser() + parser.add_argument('--end_time', type=str, default='2025-10-01') + args = parser.parse_args() + global_config['end_time'] = args.end_time + + # 交易日检查 + # 将 end_time 转换为 YYYYMMDD 格式 + end_time_str = global_config['end_time'].replace('-', '') + logger.info(f"开始检查预测日期 {global_config['end_time']} 是否为交易日") + + # 使用 global_config 中的交易日检查参数 + is_trading = check_trading_day(end_time_str, global_config) + if not is_trading: + logger.info(f"预测日期 {global_config['end_time']} 不是交易日,跳过预测任务") + exit(0) + else: + logger.info(f"预测日期 {global_config['end_time']} 是交易日,开始执行预测任务") + + predict_main() # global_config['end_time'] = '2025-08-14' diff --git a/main_juxiting_yuedu.py b/main_juxiting_yuedu.py index f25e317..6395027 100644 --- a/main_juxiting_yuedu.py +++ b/main_juxiting_yuedu.py @@ -1,13 +1,12 @@ -# 读取配置 - +import argparse from lib.dataread import * from config_juxiting_yuedu import * from lib.tools import SendMail, convert_df_to_pydantic_pp, exception_logger, find_best_models, get_modelsname, merge_images from models.nerulforcastmodels import ex_Model_Juxiting, model_losss_juxiting, pp_bdwd_png, pp_export_pdf import datetime -import torch -torch.set_float32_matmul_precision("high") -torch.set_num_threads(4) +# import torch +# torch.set_float32_matmul_precision("high") +# torch.set_num_threads(4) global_config.update({ # 核心参数 @@ -61,6 +60,10 @@ global_config.update({ 'upload_data': upload_data, 'upload_warning_url': upload_warning_url, 'warning_data': warning_data, + 'tqbz_login_url': tqbz_login_url, + 'tqbz_login_data': tqbz_login_data, + 'is_trading_day_data': is_trading_day_data, + 'query_is_trading_day_url': query_is_trading_day_url, # 查询接口 'query_data_list_item_nos_url': query_data_list_item_nos_url, @@ -81,7 +84,6 @@ global_config.update({ 'edbdatapushurl': edbdatapushurl, 'edbdeleteurl': edbdeleteurl, 'edbbusinessurl': edbbusinessurl, - 'ClassifyId': ClassifyId, 'classifylisturl': classifylisturl, # 数据库配置 @@ -251,13 +253,11 @@ def sql_inset_predict(global_config): affected_rows = config.db_mysql.execute_batch_insert( insert_query, params_list) config.logger.info(f"成功插入或更新 {affected_rows} 条记录") - config.db_mysql.close() def predict_main(): """ 主预测函数,用于从 ETA 获取数据、处理数据、训练模型并进行预测。 - 参数: signature (BinanceAPI): Binance API 实例。 etadata (EtaReader): ETA 数据读取器实例。 @@ -293,252 +293,282 @@ def predict_main(): 返回: None """ - end_time = global_config['end_time'] - signature = BinanceAPI(APPID, SECRET) - etadata = EtaReader(signature=signature, - classifylisturl=global_config['classifylisturl'], - classifyidlisturl=global_config['classifyidlisturl'], - edbcodedataurl=global_config['edbcodedataurl'], - edbcodelist=global_config['edbcodelist'], - edbdatapushurl=global_config['edbdatapushurl'], - edbdeleteurl=global_config['edbdeleteurl'], - edbbusinessurl=global_config['edbbusinessurl'], - classifyId=global_config['ClassifyId'], - ) - # 获取数据 - if is_eta: - logger.info('从eta获取数据...') - df_zhibiaoshuju, df_zhibiaoliebiao = etadata.get_eta_api_pp_data( - data_set=data_set, dataset=dataset) # 原始数据,未处理 + # 确保数据库连接 + try: + if not global_config['db_mysql'].is_connected(): + global_config['logger'].info("数据库连接已断开,正在重新连接...") + global_config['db_mysql'].connect() + global_config['logger'].info("数据库连接正常") + except Exception as e: + global_config['logger'].error(f"数据库连接失败: {str(e)}") + return + + try: - if is_market: - logger.info('从市场信息平台获取数据...') - try: - # 如果是测试环境,最高价最低价取excel文档 - if server_host == '192.168.100.53': - logger.info('从excel文档获取最高价最低价') - df_zhibiaoshuju = get_high_low_data(df_zhibiaoshuju) - else: - logger.info('从市场信息平台获取数据') - df_zhibiaoshuju = get_market_data( - end_time, df_zhibiaoshuju) + end_time = global_config['end_time'] + signature = BinanceAPI(APPID, SECRET) + etadata = EtaReader(signature=signature, + classifylisturl=global_config['classifylisturl'], + classifyidlisturl=global_config['classifyidlisturl'], + edbcodedataurl=global_config['edbcodedataurl'], + edbcodelist=global_config['edbcodelist'], + edbdatapushurl=global_config['edbdatapushurl'], + edbdeleteurl=global_config['edbdeleteurl'], + edbbusinessurl=global_config['edbbusinessurl'], + classifyId=global_config['ClassifyId'], + ) + # 获取数据 + if is_eta: + logger.info('从eta获取数据...') - except: - logger.info('最高最低价拼接失败') + df_zhibiaoshuju, df_zhibiaoliebiao = etadata.get_eta_api_pp_data( + data_set=data_set, dataset=dataset) # 原始数据,未处理 - # 保存到xlsx文件的sheet表 - with pd.ExcelWriter(os.path.join(dataset, data_set)) as file: - df_zhibiaoshuju.to_excel(file, sheet_name='指标数据', index=False) - df_zhibiaoliebiao.to_excel(file, sheet_name='指标列表', index=False) - - # 数据处理 - df = datachuli_juxiting(df_zhibiaoshuju, df_zhibiaoliebiao, y=global_config['y'], dataset=dataset, add_kdj=add_kdj, is_timefurture=is_timefurture, - end_time=end_time) - - else: - # 读取数据 - logger.info('读取本地数据:' + os.path.join(dataset, data_set)) - df, df_zhibiaoliebiao = getdata_juxiting(filename=os.path.join(dataset, data_set), y=y, dataset=dataset, add_kdj=add_kdj, - is_timefurture=is_timefurture, end_time=end_time) # 原始数据,未处理 - - # 更改预测列名称 - df.rename(columns={y: 'y'}, inplace=True) - - if is_edbnamelist: - df = df[edbnamelist] - df.to_csv(os.path.join(dataset, '指标数据.csv'), index=False) - # 保存最新日期的y值到数据库 - # 取第一行数据存储到数据库中 - first_row = df[['ds', 'y']].tail(1) - # 判断y的类型是否为float - if not isinstance(first_row['y'].values[0], float): - logger.info(f'{end_time}预测目标数据为空,跳过') - return None - - # 将最新真实值保存到数据库 - if not sqlitedb.check_table_exists('trueandpredict'): - first_row.to_sql('trueandpredict', sqlitedb.connection, index=False) - else: - for row in first_row.itertuples(index=False): - row_dict = row._asdict() - config.logger.info(f'要保存的真实值:{row_dict}') - # 判断ds是否为字符串类型,如果不是则转换为字符串类型 - if isinstance(row_dict['ds'], (pd.Timestamp, datetime.datetime)): - row_dict['ds'] = row_dict['ds'].strftime('%Y-%m-%d') - elif not isinstance(row_dict['ds'], str): + if is_market: + logger.info('从市场信息平台获取数据...') try: - row_dict['ds'] = pd.to_datetime( - row_dict['ds']).strftime('%Y-%m-%d') + # 如果是测试环境,最高价最低价取excel文档 + if server_host == '192.168.100.53': + logger.info('从excel文档获取最高价最低价') + df_zhibiaoshuju = get_high_low_data(df_zhibiaoshuju) + else: + logger.info('从市场信息平台获取数据') + df_zhibiaoshuju = get_market_data( + end_time, df_zhibiaoshuju) + except: - logger.warning(f"无法解析的时间格式: {row_dict['ds']}") - # row_dict['ds'] = row_dict['ds'].strftime('%Y-%m-%d') - # row_dict['ds'] = row_dict['ds'].strftime('%Y-%m-%d %H:%M:%S') - check_query = sqlitedb.select_data( - 'trueandpredict', where_condition=f"ds = '{row.ds}'") - if len(check_query) > 0: - set_clause = ", ".join( - [f"{key} = '{value}'" for key, value in row_dict.items()]) - sqlitedb.update_data( - 'trueandpredict', set_clause, where_condition=f"ds = '{row.ds}'") - continue - sqlitedb.insert_data('trueandpredict', tuple( - row_dict.values()), columns=row_dict.keys()) + logger.info('最高最低价拼接失败') - # 更新accuracy表的y值 - if not sqlitedb.check_table_exists('accuracy'): - pass - else: - update_y = sqlitedb.select_data( - 'accuracy', where_condition="y is null") - if len(update_y) > 0: - logger.info('更新accuracy表的y值') - # 找到update_y 中ds且df中的y的行 - update_y = update_y[update_y['ds'] <= end_time] - logger.info(f'要更新y的信息:{update_y}') - # try: - for row in update_y.itertuples(index=False): - try: - row_dict = row._asdict() - yy = df[df['ds'] == row_dict['ds']]['y'].values[0] - LOW = df[df['ds'] == row_dict['ds']]['Brentzdj'].values[0] - HIGH = df[df['ds'] == row_dict['ds']]['Brentzgj'].values[0] + # 保存到xlsx文件的sheet表 + with pd.ExcelWriter(os.path.join(dataset, data_set)) as file: + df_zhibiaoshuju.to_excel(file, sheet_name='指标数据', index=False) + df_zhibiaoliebiao.to_excel(file, sheet_name='指标列表', index=False) + + # 数据处理 + df = datachuli_juxiting(df_zhibiaoshuju, df_zhibiaoliebiao, y=global_config['y'], dataset=dataset, add_kdj=add_kdj, is_timefurture=is_timefurture, + end_time=end_time) + + else: + # 读取数据 + logger.info('读取本地数据:' + os.path.join(dataset, data_set)) + df, df_zhibiaoliebiao = getdata_juxiting(filename=os.path.join(dataset, data_set), y=y, dataset=dataset, add_kdj=add_kdj, + is_timefurture=is_timefurture, end_time=end_time) # 原始数据,未处理 + + # 更改预测列名称 + df.rename(columns={y: 'y'}, inplace=True) + + if is_edbnamelist: + df = df[edbnamelist] + df.to_csv(os.path.join(dataset, '指标数据.csv'), index=False) + # 保存最新日期的y值到数据库 + # 取第一行数据存储到数据库中 + first_row = df[['ds', 'y']].tail(1) + # 判断y的类型是否为float + if not isinstance(first_row['y'].values[0], float): + logger.info(f'{end_time}预测目标数据为空,跳过') + return None + + # 将最新真实值保存到数据库 + if not sqlitedb.check_table_exists('trueandpredict'): + first_row.to_sql('trueandpredict', sqlitedb.connection, index=False) + else: + for row in first_row.itertuples(index=False): + row_dict = row._asdict() + config.logger.info(f'要保存的真实值:{row_dict}') + # 判断ds是否为字符串类型,如果不是则转换为字符串类型 + if isinstance(row_dict['ds'], (pd.Timestamp, datetime.datetime)): + row_dict['ds'] = row_dict['ds'].strftime('%Y-%m-%d') + elif not isinstance(row_dict['ds'], str): + try: + row_dict['ds'] = pd.to_datetime( + row_dict['ds']).strftime('%Y-%m-%d') + except: + logger.warning(f"无法解析的时间格式: {row_dict['ds']}") + # row_dict['ds'] = row_dict['ds'].strftime('%Y-%m-%d') + # row_dict['ds'] = row_dict['ds'].strftime('%Y-%m-%d %H:%M:%S') + check_query = sqlitedb.select_data( + 'trueandpredict', where_condition=f"ds = '{row.ds}'") + if len(check_query) > 0: + set_clause = ", ".join( + [f"{key} = '{value}'" for key, value in row_dict.items()]) sqlitedb.update_data( - 'accuracy', f"y = {yy},LOW_PRICE = {LOW},HIGH_PRICE = {HIGH}", where_condition=f"ds = '{row_dict['ds']}'") - except: - logger.info(f'更新accuracy表的y值失败:{row_dict}') - # except Exception as e: - # logger.info(f'更新accuracy表的y值失败:{e}') + 'trueandpredict', set_clause, where_condition=f"ds = '{row.ds}'") + continue + sqlitedb.insert_data('trueandpredict', tuple( + row_dict.values()), columns=row_dict.keys()) - # 判断当前日期是不是周一 - is_weekday = datetime.datetime.now().weekday() == 0 - if is_weekday: - logger.info('今天是周一,更新预测模型') - # 计算最近60天预测残差最低的模型名称 - model_results = sqlitedb.select_data( - 'trueandpredict', order_by="ds DESC", limit="60") - # 删除空值率为90%以上的列 - if len(model_results) > 10: - model_results = model_results.dropna( - thresh=len(model_results)*0.1, axis=1) - # 删除空行 - model_results = model_results.dropna() - modelnames = model_results.columns.to_list()[2:-1] - for col in model_results[modelnames].select_dtypes(include=['object']).columns: - model_results[col] = model_results[col].astype(np.float32) - # 计算每个预测值与真实值之间的偏差率 - for model in modelnames: - model_results[f'{model}_abs_error_rate'] = abs( - model_results['y'] - model_results[model]) / model_results['y'] - # 获取每行对应的最小偏差率值 - min_abs_error_rate_values = model_results.apply( - lambda row: row[[f'{model}_abs_error_rate' for model in modelnames]].min(), axis=1) - # 获取每行对应的最小偏差率值对应的列名 - min_abs_error_rate_column_name = model_results.apply( - lambda row: row[[f'{model}_abs_error_rate' for model in modelnames]].idxmin(), axis=1) - # 将列名索引转换为列名 - min_abs_error_rate_column_name = min_abs_error_rate_column_name.map( - lambda x: x.split('_')[0]) - # 取出现次数最多的模型名称 - most_common_model = min_abs_error_rate_column_name.value_counts().idxmax() - logger.info(f"最近60天预测残差最低的模型名称:{most_common_model}") - # 保存结果到数据库 - if not sqlitedb.check_table_exists('most_model'): - sqlitedb.create_table( - 'most_model', columns="ds datetime, most_common_model TEXT") - sqlitedb.insert_data('most_model', (datetime.datetime.now().strftime( - '%Y-%m-%d %H:%M:%S'), most_common_model,), columns=('ds', 'most_common_model',)) + # 更新accuracy表的y值 + if not sqlitedb.check_table_exists('accuracy'): + pass + else: + update_y = sqlitedb.select_data( + 'accuracy', where_condition="y is null") + if len(update_y) > 0: + logger.info('更新accuracy表的y值') + # 找到update_y 中ds且df中的y的行 + update_y = update_y[update_y['ds'] <= end_time] + logger.info(f'要更新y的信息:{update_y}') + # try: + for row in update_y.itertuples(index=False): + try: + row_dict = row._asdict() + yy = df[df['ds'] == row_dict['ds']]['y'].values[0] + LOW = df[df['ds'] == row_dict['ds']]['Brentzdj'].values[0] + HIGH = df[df['ds'] == row_dict['ds']]['Brentzgj'].values[0] + sqlitedb.update_data( + 'accuracy', f"y = {yy},LOW_PRICE = {LOW},HIGH_PRICE = {HIGH}", where_condition=f"ds = '{row_dict['ds']}'") + except: + # logger.info(f'更新accuracy表的y值失败:{row_dict}') + pass + # except Exception as e: + # logger.info(f'更新accuracy表的y值失败:{e}') - if is_corr: - df = corr_feature(df=df) + # 判断当前日期是不是周一 + is_weekday = datetime.datetime.now().weekday() == 0 + # is_weekday = datetime.datetime.now().weekday() == 4 + try: + if is_weekday: + logger.info('今天是周一,更新预测模型') + # 计算最近60天预测残差最低的模型名称 + model_results = sqlitedb.select_data( + 'trueandpredict', order_by="ds DESC", limit="60") + # 删除空值率为90%以上的列 + if len(model_results) > 10: + model_results = model_results.dropna( + thresh=len(model_results)*0.1, axis=1) + # 删除空行 + model_results = model_results.dropna() + modelnames = model_results.columns.to_list()[2:-1] + for col in model_results[modelnames].select_dtypes(include=['object']).columns: + model_results[col] = model_results[col].astype(np.float32) + # 计算每个预测值与真实值之间的偏差率 + for model in modelnames: + model_results[f'{model}_abs_error_rate'] = abs( + model_results['y'] - model_results[model]) / model_results['y'] + # 获取每行对应的最小偏差率值 + min_abs_error_rate_values = model_results.apply( + lambda row: row[[f'{model}_abs_error_rate' for model in modelnames]].min(), axis=1) + # 获取每行对应的最小偏差率值对应的列名 + min_abs_error_rate_column_name = model_results.apply( + lambda row: row[[f'{model}_abs_error_rate' for model in modelnames]].idxmin(), axis=1) + # 将列名索引转换为列名 + min_abs_error_rate_column_name = min_abs_error_rate_column_name.map( + lambda x: x.split('_')[0]) + # 取出现次数最多的模型名称 + most_common_model = min_abs_error_rate_column_name.value_counts().idxmax() + logger.info(f"最近60天预测残差最低的模型名称:{most_common_model}") + # 保存结果到数据库 + if not sqlitedb.check_table_exists('most_model'): + sqlitedb.create_table( + 'most_model', columns="ds datetime, most_common_model TEXT") + sqlitedb.insert_data('most_model', (datetime.datetime.now().strftime( + '%Y-%m-%d %H:%M:%S'), most_common_model,), columns=('ds', 'most_common_model',)) + except : + logger.info(f"周一最佳模型更新失败,影响pdf报告") - df1 = df.copy() # 备份一下,后面特征筛选完之后加入ds y 列用 - logger.info(f"开始训练模型...") - row, col = df.shape + if is_corr: + df = corr_feature(df=df) - now = datetime.datetime.now().strftime('%Y%m%d%H%M%S') - ex_Model_Juxiting(df, - horizon=global_config['horizon'], - input_size=global_config['input_size'], - train_steps=global_config['train_steps'], - val_check_steps=global_config['val_check_steps'], - early_stop_patience_steps=global_config['early_stop_patience_steps'], - is_debug=global_config['is_debug'], - dataset=global_config['dataset'], - is_train=global_config['is_train'], - is_fivemodels=global_config['is_fivemodels'], - val_size=global_config['val_size'], - test_size=global_config['test_size'], - settings=global_config['settings'], - now=now, - etadata=etadata, - modelsindex=global_config['modelsindex'], - data=data, - is_eta=global_config['is_eta'], - end_time=global_config['end_time'], - ) + df1 = df.copy() # 备份一下,后面特征筛选完之后加入ds y 列用 + logger.info(f"开始训练模型...") + row, col = df.shape - logger.info('模型训练完成') + now = datetime.datetime.now().strftime('%Y%m%d%H%M%S') + ex_Model_Juxiting(df, + horizon=global_config['horizon'], + input_size=global_config['input_size'], + train_steps=global_config['train_steps'], + val_check_steps=global_config['val_check_steps'], + early_stop_patience_steps=global_config['early_stop_patience_steps'], + is_debug=global_config['is_debug'], + dataset=global_config['dataset'], + is_train=global_config['is_train'], + is_fivemodels=global_config['is_fivemodels'], + val_size=global_config['val_size'], + test_size=global_config['test_size'], + settings=global_config['settings'], + now=now, + etadata=etadata, + modelsindex=global_config['modelsindex'], + data=data, + is_eta=global_config['is_eta'], + end_time=global_config['end_time'], + ) - try: - logger.info('训练数据绘图ing') - model_results3 = model_losss_juxiting( - sqlitedb, end_time=global_config['end_time'], is_fivemodels=global_config['is_fivemodels']) - logger.info('训练数据绘图end') + logger.info('模型训练完成') + + # try: + # logger.info('训练数据绘图ing') + # model_results3 = model_losss_juxiting( + # sqlitedb, end_time=global_config['end_time'], is_fivemodels=global_config['is_fivemodels']) + # logger.info('训练数据绘图end') + # except Exception as e: + # logger.info(f'训练数据绘图失败:{e}') + + try: + push_market_value() + logger.info('推送市场值完成') + except Exception as e: + logger.info(f'推送市场值失败:{e}') + + try: + sql_inset_predict(global_config) + logger.info('插入预测数据完成') + except Exception as e: + logger.info(f'插入预测数据失败:{e}') + + + # 模型报告 + # logger.info('制作报告ing') + # title = f'{settings}--{end_time}-预测报告' # 报告标题 + # reportname = f'聚烯烃PP大模型月度预测--{end_time}.pdf' # 报告文件名 + # reportname = reportname.replace(':', '-') # 替换冒号 + # pp_export_pdf(dataset=dataset, num_models=5 if is_fivemodels else 22, time=end_time, + # reportname=reportname, sqlitedb=sqlitedb), + + # logger.info('制作报告end') + + + + # # LSTM 单变量模型 + # ex_Lstm(df,input_seq_len=input_size,output_seq_len=horizon,is_debug=is_debug,dataset=dataset) + + # # lstm 多变量模型 + # ex_Lstm_M(df,n_days=input_size,out_days=horizon,is_debug=is_debug,datasetpath=dataset) + + # # GRU 模型 + # # ex_GRU(df) + + # 发送邮件 + # m = SendMail( + # username=username, + # passwd=passwd, + # recv=recv, + # title=title, + # content=content, + # file=max(glob.glob(os.path.join(dataset,'*.pdf')), key=os.path.getctime), + # ssl=ssl, + # ) + # m.send_mail() + except Exception as e: - logger.info(f'训练数据绘图失败:{e}') - - try: - push_market_value() - logger.info('推送市场值完成') - except Exception as e: - logger.info(f'推送市场值失败:{e}') - - try: - sql_inset_predict(global_config) - logger.info('插入预测数据完成') - except Exception as e: - logger.info(f'插入预测数据失败:{e}') - - - # 模型报告 - # logger.info('制作报告ing') - # title = f'{settings}--{end_time}-预测报告' # 报告标题 - # reportname = f'聚烯烃PP大模型月度预测--{end_time}.pdf' # 报告文件名 - # reportname = reportname.replace(':', '-') # 替换冒号 - # pp_export_pdf(dataset=dataset, num_models=5 if is_fivemodels else 22, time=end_time, - # reportname=reportname, sqlitedb=sqlitedb), - - # logger.info('制作报告end') - - - - # # LSTM 单变量模型 - # ex_Lstm(df,input_seq_len=input_size,output_seq_len=horizon,is_debug=is_debug,dataset=dataset) - - # # lstm 多变量模型 - # ex_Lstm_M(df,n_days=input_size,out_days=horizon,is_debug=is_debug,datasetpath=dataset) - - # # GRU 模型 - # # ex_GRU(df) - - # 发送邮件 - # m = SendMail( - # username=username, - # passwd=passwd, - # recv=recv, - # title=title, - # content=content, - # file=max(glob.glob(os.path.join(dataset,'*.pdf')), key=os.path.getctime), - # ssl=ssl, - # ) - # m.send_mail() + global_config['logger'].error(f"预测过程中发生错误: {str(e)}") + raise + finally: + # 确保数据库连接被关闭 + try: + if global_config['db_mysql'].is_connected(): + global_config['db_mysql'].close() + global_config['logger'].info("数据库连接已关闭") + except Exception as e: + global_config['logger'].error(f"关闭数据库连接时发生错误: {str(e)}") if __name__ == '__main__': # global end_time # 遍历2024-11-25 到 2024-12-3 之间的工作日日期 - # for i_time in pd.date_range('2025-7-24', '2025-8-12', freq='B'): + # for i_time in pd.date_range('2025-8-14', '2025-9-1', freq='B'): # try: # global_config['end_time'] = i_time.strftime('%Y-%m-%d') # global_config['db_mysql'].connect() @@ -547,7 +577,29 @@ if __name__ == '__main__': # logger.info(f'预测失败:{e}') # continue - # global_config['end_time'] = '2025-08-14' + parser = argparse.ArgumentParser() + parser.add_argument('--end_time', type=str, default='2025-10-01') + args = parser.parse_args() + global_config['end_time'] = args.end_time + + + # 交易日检查 + # 将 end_time 转换为 YYYYMMDD 格式 + end_time_str = global_config['end_time'].replace('-', '') + logger.info(f"开始检查预测日期 {global_config['end_time']} 是否为交易日") + + # 使用 global_config 中的交易日检查参数 + is_trading = check_trading_day(end_time_str, global_config) + if not is_trading: + logger.info(f"预测日期 {global_config['end_time']} 不是交易日,跳过预测任务") + exit(0) + else: + logger.info(f"预测日期 {global_config['end_time']} 是交易日,开始执行预测任务") + except Exception as e: + logger.error(f"交易日检查失败: {str(e)}") + logger.info("由于交易日检查失败,继续执行预测任务") + + predict_main() # push_market_value() # sql_inset_predict(global_config) diff --git a/main_juxiting_zhoudu.py b/main_juxiting_zhoudu.py index defda09..b3ad693 100644 --- a/main_juxiting_zhoudu.py +++ b/main_juxiting_zhoudu.py @@ -1,13 +1,14 @@ -# 读取配置 +# 读取配置 from lib.dataread import * from config_juxiting_zhoudu import * from lib.tools import SendMail, exception_logger, convert_df_to_pydantic_pp, exception_logger, find_best_models, get_modelsname from models.nerulforcastmodels import ex_Model_Juxiting, model_losss_juxiting, pp_export_pdf import datetime -import torch -torch.set_float32_matmul_precision("high") -torch.set_num_threads(4) +import argparse +# import torch +# torch.set_float32_matmul_precision("high") +# torch.set_num_threads(4) global_config.update({ # 核心参数 @@ -60,6 +61,10 @@ global_config.update({ 'upload_data': upload_data, 'upload_warning_url': upload_warning_url, 'warning_data': warning_data, + 'tqbz_login_url': tqbz_login_url, + 'tqbz_login_data': tqbz_login_data, + 'is_trading_day_data': is_trading_day_data, + 'query_is_trading_day_url': query_is_trading_day_url, # 查询接口 'query_data_list_item_nos_url': query_data_list_item_nos_url, @@ -88,7 +93,6 @@ global_config.update({ 'edbdatapushurl': edbdatapushurl, 'edbdeleteurl': edbdeleteurl, 'edbbusinessurl': edbbusinessurl, - 'ClassifyId': ClassifyId, 'classifylisturl': classifylisturl, # 数据库配置 @@ -230,12 +234,11 @@ def sql_inset_predict(global_config): affected_rows = config.db_mysql.execute_batch_insert( insert_query, params_list) config.logger.info(f"成功插入或更新 {affected_rows} 条记录") - config.db_mysql.close() - def predict_main(): """ 主预测函数,用于从 ETA 获取数据、处理数据、训练模型并进行预测。 + 参数: signature (BinanceAPI): Binance API 实例。 @@ -272,246 +275,271 @@ def predict_main(): 返回: None """ - end_time = global_config['end_time'] - signature = BinanceAPI(APPID, SECRET) - etadata = EtaReader(signature=signature, - classifylisturl=global_config['classifylisturl'], - classifyidlisturl=global_config['classifyidlisturl'], - edbcodedataurl=global_config['edbcodedataurl'], - edbcodelist=global_config['edbcodelist'], - edbdatapushurl=global_config['edbdatapushurl'], - edbdeleteurl=global_config['edbdeleteurl'], - edbbusinessurl=global_config['edbbusinessurl'], - classifyId=global_config['ClassifyId'], - ) - # 获取数据 - if is_eta: - logger.info('从eta获取数据...') - df_zhibiaoshuju, df_zhibiaoliebiao = etadata.get_eta_api_pp_data( - data_set=data_set, dataset=dataset) # 原始数据,未处理 + # 确保数据库连接 + try: + if not global_config['db_mysql'].is_connected(): + global_config['logger'].info("数据库连接已断开,正在重新连接...") + global_config['db_mysql'].connect() + global_config['logger'].info("数据库连接正常") + except Exception as e: + global_config['logger'].error(f"数据库连接失败: {str(e)}") + return + + try: + end_time = global_config['end_time'] + signature = BinanceAPI(APPID, SECRET) + etadata = EtaReader(signature=signature, + classifylisturl=global_config['classifylisturl'], + classifyidlisturl=global_config['classifyidlisturl'], + edbcodedataurl=global_config['edbcodedataurl'], + edbcodelist=global_config['edbcodelist'], + edbdatapushurl=global_config['edbdatapushurl'], + edbdeleteurl=global_config['edbdeleteurl'], + edbbusinessurl=global_config['edbbusinessurl'], + classifyId=global_config['ClassifyId'], + ) + # 获取数据 + if is_eta: + logger.info('从eta获取数据...') - if is_market: - logger.info('从市场信息平台获取数据...') - try: - # 如果是测试环境,最高价最低价取excel文档 - if server_host == '192.168.100.53': - logger.info('从excel文档获取市场信息平台指标') - df_zhibiaoshuju = get_shujuxiang_data(df_zhibiaoshuju) - else: - logger.info('从市场信息平台获取数据') - df_zhibiaoshuju = get_market_data( - end_time, df_zhibiaoshuju) + df_zhibiaoshuju, df_zhibiaoliebiao = etadata.get_eta_api_pp_data( + data_set=data_set, dataset=dataset) # 原始数据,未处理 - except: - logger.info('市场信息平台数据项-eta数据项 拼接失败') - - # 保存到xlsx文件的sheet表 - with pd.ExcelWriter(os.path.join(dataset, data_set)) as file: - df_zhibiaoshuju.to_excel(file, sheet_name='指标数据', index=False) - df_zhibiaoliebiao.to_excel(file, sheet_name='指标列表', index=False) - - # 数据处理 - df = datachuli_juxiting(df_zhibiaoshuju, df_zhibiaoliebiao, y=global_config['y'], dataset=dataset, add_kdj=add_kdj, is_timefurture=is_timefurture, - end_time=end_time) - - else: - # 读取数据 - logger.info('读取本地数据:' + os.path.join(dataset, data_set)) - df, df_zhibiaoliebiao = getdata_juxiting(filename=os.path.join(dataset, data_set), y=y, dataset=dataset, add_kdj=add_kdj, - is_timefurture=is_timefurture, end_time=end_time) # 原始数据,未处理 - - # 更改预测列名称 - df.rename(columns={y: 'y'}, inplace=True) - - if is_edbnamelist: - df = df[edbnamelist] - df.to_csv(os.path.join(dataset, '指标数据.csv'), index=False) - # 保存最新日期的y值到数据库 - # 取第一行数据存储到数据库中 - first_row = df[['ds', 'y']].tail(1) - # 判断y的类型是否为float - if not isinstance(first_row['y'].values[0], float): - logger.info(f'{end_time}预测目标数据为空,跳过') - return None - - # 将最新真实值保存到数据库 - if not sqlitedb.check_table_exists('trueandpredict'): - first_row.to_sql('trueandpredict', sqlitedb.connection, index=False) - else: - for row in first_row.itertuples(index=False): - row_dict = row._asdict() - config.logger.info(f'要保存的真实值:{row_dict}') - # 判断ds是否为字符串类型,如果不是则转换为字符串类型 - if isinstance(row_dict['ds'], (pd.Timestamp, datetime.datetime)): - row_dict['ds'] = row_dict['ds'].strftime('%Y-%m-%d') - elif not isinstance(row_dict['ds'], str): + if is_market: + logger.info('从市场信息平台获取数据...') try: - row_dict['ds'] = pd.to_datetime( - row_dict['ds']).strftime('%Y-%m-%d') + # 如果是测试环境,最高价最低价取excel文档 + if server_host == '192.168.100.53': + logger.info('从excel文档获取市场信息平台指标') + df_zhibiaoshuju = get_shujuxiang_data(df_zhibiaoshuju) + else: + logger.info('从市场信息平台获取数据') + df_zhibiaoshuju = get_market_data( + end_time, df_zhibiaoshuju) + except: - logger.warning(f"无法解析的时间格式: {row_dict['ds']}") - # row_dict['ds'] = row_dict['ds'].strftime('%Y-%m-%d') - # row_dict['ds'] = row_dict['ds'].strftime('%Y-%m-%d %H:%M:%S') - check_query = sqlitedb.select_data( - 'trueandpredict', where_condition=f"ds = '{row.ds}'") - if len(check_query) > 0: - set_clause = ", ".join( - [f"{key} = '{value}'" for key, value in row_dict.items()]) - sqlitedb.update_data( - 'trueandpredict', set_clause, where_condition=f"ds = '{row.ds}'") - continue - sqlitedb.insert_data('trueandpredict', tuple( - row_dict.values()), columns=row_dict.keys()) + logger.info('市场信息平台数据项-eta数据项 拼接失败') - # 更新accuracy表的y值 - if not sqlitedb.check_table_exists('accuracy'): - pass - else: - update_y = sqlitedb.select_data( - 'accuracy', where_condition="y is null") - if len(update_y) > 0: - logger.info('更新accuracy表的y值') - # 找到update_y 中ds且df中的y的行 - update_y = update_y[update_y['ds'] <= end_time] - logger.info(f'要更新y的信息:{update_y}') - # try: - for row in update_y.itertuples(index=False): - try: - row_dict = row._asdict() - yy = df[df['ds'] == row_dict['ds']]['y'].values[0] - LOW = df[df['ds'] == row_dict['ds']]['Brentzdj'].values[0] - HIGH = df[df['ds'] == row_dict['ds']]['Brentzgj'].values[0] + # 保存到xlsx文件的sheet表 + with pd.ExcelWriter(os.path.join(dataset, data_set)) as file: + df_zhibiaoshuju.to_excel(file, sheet_name='指标数据', index=False) + df_zhibiaoliebiao.to_excel(file, sheet_name='指标列表', index=False) + + # 数据处理 + df = datachuli_juxiting(df_zhibiaoshuju, df_zhibiaoliebiao, y=global_config['y'], dataset=dataset, add_kdj=add_kdj, is_timefurture=is_timefurture, + end_time=end_time) + + else: + # 读取数据 + logger.info('读取本地数据:' + os.path.join(dataset, data_set)) + df, df_zhibiaoliebiao = getdata_juxiting(filename=os.path.join(dataset, data_set), y=y, dataset=dataset, add_kdj=add_kdj, + is_timefurture=is_timefurture, end_time=end_time) # 原始数据,未处理 + + # 更改预测列名称 + df.rename(columns={y: 'y'}, inplace=True) + + if is_edbnamelist: + df = df[edbnamelist] + df.to_csv(os.path.join(dataset, '指标数据.csv'), index=False) + # 保存最新日期的y值到数据库 + # 取第一行数据存储到数据库中 + first_row = df[['ds', 'y']].tail(1) + # 判断y的类型是否为float + if not isinstance(first_row['y'].values[0], float): + logger.info(f'{end_time}预测目标数据为空,跳过') + return None + + # 将最新真实值保存到数据库 + if not sqlitedb.check_table_exists('trueandpredict'): + first_row.to_sql('trueandpredict', sqlitedb.connection, index=False) + else: + for row in first_row.itertuples(index=False): + row_dict = row._asdict() + config.logger.info(f'要保存的真实值:{row_dict}') + # 判断ds是否为字符串类型,如果不是则转换为字符串类型 + if isinstance(row_dict['ds'], (pd.Timestamp, datetime.datetime)): + row_dict['ds'] = row_dict['ds'].strftime('%Y-%m-%d') + elif not isinstance(row_dict['ds'], str): + try: + row_dict['ds'] = pd.to_datetime( + row_dict['ds']).strftime('%Y-%m-%d') + except: + logger.warning(f"无法解析的时间格式: {row_dict['ds']}") + # row_dict['ds'] = row_dict['ds'].strftime('%Y-%m-%d') + # row_dict['ds'] = row_dict['ds'].strftime('%Y-%m-%d %H:%M:%S') + check_query = sqlitedb.select_data( + 'trueandpredict', where_condition=f"ds = '{row.ds}'") + if len(check_query) > 0: + set_clause = ", ".join( + [f"{key} = '{value}'" for key, value in row_dict.items()]) sqlitedb.update_data( - 'accuracy', f"y = {yy},LOW_PRICE = {LOW},HIGH_PRICE = {HIGH}", where_condition=f"ds = '{row_dict['ds']}'") - except: - logger.info(f'更新accuracy表的y值失败:{row_dict}') - # except Exception as e: - # logger.info(f'更新accuracy表的y值失败:{e}') + 'trueandpredict', set_clause, where_condition=f"ds = '{row.ds}'") + continue + sqlitedb.insert_data('trueandpredict', tuple( + row_dict.values()), columns=row_dict.keys()) - # 判断当前日期是不是周一 - is_weekday = datetime.datetime.now().weekday() == 0 - # if is_weekday: - # logger.info('今天是周一,更新预测模型') - # # 计算最近60天预测残差最低的模型名称 - # model_results = sqlitedb.select_data( - # 'trueandpredict', order_by="ds DESC", limit="60") - # # 删除空值率为90%以上的列 - # if len(model_results) > 10: - # model_results = model_results.dropna( - # thresh=len(model_results)*0.1, axis=1) - # # 删除空行 - # model_results = model_results.dropna() - # modelnames = model_results.columns.to_list()[2:-2] - # for col in model_results[modelnames].select_dtypes(include=['object']).columns: - # model_results[col] = model_results[col].astype(np.float32) - # # 计算每个预测值与真实值之间的偏差率 - # for model in modelnames: - # model_results[f'{model}_abs_error_rate'] = abs( - # model_results['y'] - model_results[model]) / model_results['y'] - # # 获取每行对应的最小偏差率值 - # min_abs_error_rate_values = model_results.apply( - # lambda row: row[[f'{model}_abs_error_rate' for model in modelnames]].min(), axis=1) - # # 获取每行对应的最小偏差率值对应的列名 - # min_abs_error_rate_column_name = model_results.apply( - # lambda row: row[[f'{model}_abs_error_rate' for model in modelnames]].idxmin(), axis=1) - # # 将列名索引转换为列名 - # min_abs_error_rate_column_name = min_abs_error_rate_column_name.map( - # lambda x: x.split('_')[0]) - # # 取出现次数最多的模型名称 - # most_common_model = min_abs_error_rate_column_name.value_counts().idxmax() - # logger.info(f"最近60天预测残差最低的模型名称:{most_common_model}") - # # 保存结果到数据库 - # if not sqlitedb.check_table_exists('most_model'): - # sqlitedb.create_table( - # 'most_model', columns="ds datetime, most_common_model TEXT") - # sqlitedb.insert_data('most_model', (datetime.datetime.now().strftime( - # '%Y-%m-%d %H:%M:%S'), most_common_model,), columns=('ds', 'most_common_model',)) + # 更新accuracy表的y值 + if not sqlitedb.check_table_exists('accuracy'): + pass + else: + update_y = sqlitedb.select_data( + 'accuracy', where_condition="y is null") + if len(update_y) > 0: + logger.info('更新accuracy表的y值') + # 找到update_y 中ds且df中的y的行 + update_y = update_y[update_y['ds'] <= end_time] + logger.info(f'要更新y的信息:{update_y}') + # try: + for row in update_y.itertuples(index=False): + try: + row_dict = row._asdict() + yy = df[df['ds'] == row_dict['ds']]['y'].values[0] + LOW = df[df['ds'] == row_dict['ds']]['Brentzdj'].values[0] + HIGH = df[df['ds'] == row_dict['ds']]['Brentzgj'].values[0] + sqlitedb.update_data( + 'accuracy', f"y = {yy},LOW_PRICE = {LOW},HIGH_PRICE = {HIGH}", where_condition=f"ds = '{row_dict['ds']}'") + except: + # logger.info(f'更新accuracy表的y值失败:{row_dict}') + pass + # except Exception as e: + # logger.info(f'更新accuracy表的y值失败:{e}') - if is_corr: - df = corr_feature(df=df) + # 判断当前日期是不是周一 + is_weekday = datetime.datetime.now().weekday() == 0 + # if is_weekday: + # logger.info('今天是周一,更新预测模型') + # # 计算最近60天预测残差最低的模型名称 + # model_results = sqlitedb.select_data( + # 'trueandpredict', order_by="ds DESC", limit="60") + # # 删除空值率为90%以上的列 + # if len(model_results) > 10: + # model_results = model_results.dropna( + # thresh=len(model_results)*0.1, axis=1) + # # 删除空行 + # model_results = model_results.dropna() + # modelnames = model_results.columns.to_list()[2:-2] + # for col in model_results[modelnames].select_dtypes(include=['object']).columns: + # model_results[col] = model_results[col].astype(np.float32) + # # 计算每个预测值与真实值之间的偏差率 + # for model in modelnames: + # model_results[f'{model}_abs_error_rate'] = abs( + # model_results['y'] - model_results[model]) / model_results['y'] + # # 获取每行对应的最小偏差率值 + # min_abs_error_rate_values = model_results.apply( + # lambda row: row[[f'{model}_abs_error_rate' for model in modelnames]].min(), axis=1) + # # 获取每行对应的最小偏差率值对应的列名 + # min_abs_error_rate_column_name = model_results.apply( + # lambda row: row[[f'{model}_abs_error_rate' for model in modelnames]].idxmin(), axis=1) + # # 将列名索引转换为列名 + # min_abs_error_rate_column_name = min_abs_error_rate_column_name.map( + # lambda x: x.split('_')[0]) + # # 取出现次数最多的模型名称 + # most_common_model = min_abs_error_rate_column_name.value_counts().idxmax() + # logger.info(f"最近60天预测残差最低的模型名称:{most_common_model}") + # # 保存结果到数据库 + # if not sqlitedb.check_table_exists('most_model'): + # sqlitedb.create_table( + # 'most_model', columns="ds datetime, most_common_model TEXT") + # sqlitedb.insert_data('most_model', (datetime.datetime.now().strftime( + # '%Y-%m-%d %H:%M:%S'), most_common_model,), columns=('ds', 'most_common_model',)) - df1 = df.copy() # 备份一下,后面特征筛选完之后加入ds y 列用 - logger.info(f"开始训练模型...") - row, col = df.shape + if is_corr: + df = corr_feature(df=df) - now = datetime.datetime.now().strftime('%Y%m%d%H%M%S') - ex_Model_Juxiting(df, - horizon=global_config['horizon'], - input_size=global_config['input_size'], - train_steps=global_config['train_steps'], - val_check_steps=global_config['val_check_steps'], - early_stop_patience_steps=global_config['early_stop_patience_steps'], - is_debug=global_config['is_debug'], - dataset=global_config['dataset'], - is_train=global_config['is_train'], - is_fivemodels=global_config['is_fivemodels'], - val_size=global_config['val_size'], - test_size=global_config['test_size'], - settings=global_config['settings'], - now=now, - etadata=etadata, - modelsindex=global_config['modelsindex'], - data=data, - is_eta=global_config['is_eta'], - end_time=global_config['end_time'], - ) + df1 = df.copy() # 备份一下,后面特征筛选完之后加入ds y 列用 + logger.info(f"开始训练模型...") + row, col = df.shape - logger.info('模型训练完成') + now = datetime.datetime.now().strftime('%Y%m%d%H%M%S') + ex_Model_Juxiting(df, + horizon=global_config['horizon'], + input_size=global_config['input_size'], + train_steps=global_config['train_steps'], + val_check_steps=global_config['val_check_steps'], + early_stop_patience_steps=global_config['early_stop_patience_steps'], + is_debug=global_config['is_debug'], + dataset=global_config['dataset'], + is_train=global_config['is_train'], + is_fivemodels=global_config['is_fivemodels'], + val_size=global_config['val_size'], + test_size=global_config['test_size'], + settings=global_config['settings'], + now=now, + etadata=etadata, + modelsindex=global_config['modelsindex'], + data=data, + is_eta=global_config['is_eta'], + end_time=global_config['end_time'], + ) - # logger.info('训练数据绘图ing') - # model_results3 = model_losss_juxiting( - # sqlitedb, end_time=global_config['end_time'], is_fivemodels=global_config['is_fivemodels']) - # logger.info('训练数据绘图end') + logger.info('模型训练完成') - # # # 模型报告 - # logger.info('制作报告ing') - # title = f'{settings}--{end_time}-预测报告' # 报告标题 - # reportname = f'聚烯烃PP大模型周度预测--{end_time}.pdf' # 报告文件名 - # reportname = reportname.replace(':', '-') # 替换冒号 - # pp_export_pdf(dataset=dataset, num_models=5 if is_fivemodels else 22, time=end_time, - # reportname=reportname, sqlitedb=sqlitedb), + # logger.info('训练数据绘图ing') + # model_results3 = model_losss_juxiting( + # sqlitedb, end_time=global_config['end_time'], is_fivemodels=global_config['is_fivemodels']) + # logger.info('训练数据绘图end') - # logger.info('制作报告end') + # # # 模型报告 + # logger.info('制作报告ing') + # title = f'{settings}--{end_time}-预测报告' # 报告标题 + # reportname = f'聚烯烃PP大模型周度预测--{end_time}.pdf' # 报告文件名 + # reportname = reportname.replace(':', '-') # 替换冒号 + # pp_export_pdf(dataset=dataset, num_models=5 if is_fivemodels else 22, time=end_time, + # reportname=reportname, sqlitedb=sqlitedb), - try: - push_market_value() - logger.info('推送市场值完成') + # logger.info('制作报告end') + + try: + push_market_value() + logger.info('推送市场值完成') + except Exception as e: + logger.info(f'推送市场值失败:{e}') + + try: + sql_inset_predict(global_config) + logger.info('插入预测数据完成') + except Exception as e: + logger.info(f'插入预测数据失败:{e}') + + # # LSTM 单变量模型 + # ex_Lstm(df,input_seq_len=input_size,output_seq_len=horizon,is_debug=is_debug,dataset=dataset) + + # # lstm 多变量模型 + # ex_Lstm_M(df,n_days=input_size,out_days=horizon,is_debug=is_debug,datasetpath=dataset) + + # # GRU 模型 + # # ex_GRU(df) + + # 发送邮件 + # m = SendMail( + # username=username, + # passwd=passwd, + # recv=recv, + # title=title, + # content=content, + # file=max(glob.glob(os.path.join(dataset,'*.pdf')), key=os.path.getctime), + # ssl=ssl, + # ) + # m.send_mail() + except Exception as e: - logger.info(f'推送市场值失败:{e}') - - try: - sql_inset_predict(global_config) - logger.info('插入预测数据完成') - except Exception as e: - logger.info(f'插入预测数据失败:{e}') - - # # LSTM 单变量模型 - # ex_Lstm(df,input_seq_len=input_size,output_seq_len=horizon,is_debug=is_debug,dataset=dataset) - - # # lstm 多变量模型 - # ex_Lstm_M(df,n_days=input_size,out_days=horizon,is_debug=is_debug,datasetpath=dataset) - - # # GRU 模型 - # # ex_GRU(df) - - # 发送邮件 - # m = SendMail( - # username=username, - # passwd=passwd, - # recv=recv, - # title=title, - # content=content, - # file=max(glob.glob(os.path.join(dataset,'*.pdf')), key=os.path.getctime), - # ssl=ssl, - # ) - # m.send_mail() + global_config['logger'].error(f"预测过程中发生错误: {str(e)}") + raise + finally: + # 确保数据库连接被关闭 + try: + if global_config['db_mysql'].is_connected(): + global_config['db_mysql'].close() + global_config['logger'].info("数据库连接已关闭") + except Exception as e: + global_config['logger'].error(f"关闭数据库连接时发生错误: {str(e)}") if __name__ == '__main__': # global end_time # 遍历2024-11-25 到 2024-12-3 之间的工作日日期 - # for i_time in pd.date_range('2025-3-3', '2025-5-30', freq='B'): + # for i_time in pd.date_range('2025-8-14', '2025-9-1', freq='B'): # try: # global_config['end_time'] = i_time.strftime('%Y-%m-%d') # global_config['db_mysql'].connect() @@ -520,8 +548,24 @@ if __name__ == '__main__': # logger.info(f'预测失败:{e}') # continue + parser = argparse.ArgumentParser() + parser.add_argument('--end_time', type=str, default='2025-10-07') + args = parser.parse_args() + global_config['end_time'] = args.end_time - # global_config['end_time'] = '2025-08-14' + + # 交易日检查 + # 将 end_time 转换为 YYYYMMDD 格式 + end_time_str = global_config['end_time'].replace('-', '') + logger.info(f"开始检查预测日期 {global_config['end_time']} 是否为交易日") + + # 使用 global_config 中的交易日检查参数 + is_trading = check_trading_day(end_time_str, global_config) + if not is_trading: + logger.info(f"预测日期 {global_config['end_time']} 不是交易日,跳过预测任务") + exit(0) + else: + logger.info(f"预测日期 {global_config['end_time']} 是交易日,开始执行预测任务") predict_main() diff --git a/main_yuanyou.py b/main_yuanyou.py index ee2f185..3d47d31 100644 --- a/main_yuanyou.py +++ b/main_yuanyou.py @@ -1,5 +1,6 @@ # 读取配置 +import argparse from lib.dataread import * from config_jingbo import * from lib.tools import SendMail, convert_df_to_pydantic, exception_logger, get_modelsname @@ -72,6 +73,12 @@ global_config.update({ 'get_waring_data_value_list_url': get_waring_data_value_list_url, 'get_waring_data_value_list_data': get_waring_data_value_list_data, + # 套期保值api + "tqbz_login_url":tqbz_login_url, + "tqbz_login_data":tqbz_login_data, + 'query_is_trading_day_url': query_is_trading_day_url, + 'is_trading_day_data': is_trading_day_data, + # eta 配置 'APPID': APPID, 'SECRET': SECRET, @@ -234,348 +241,339 @@ def sql_inset_predict(global_config): affected_rows = config.db_mysql.execute_batch_insert( insert_query, params_list) config.logger.info(f"成功插入或更新 {affected_rows} 条记录") - config.db_mysql.close() def predict_main(): """ 主预测函数,用于从 ETA 获取数据、处理数据、训练模型并进行预测。 - - 参数: - signature (BinanceAPI): Binance API 实例。 - etadata (EtaReader): ETA 数据读取器实例。 - is_eta (bool): 是否从 ETA 获取数据。 - data_set (str): 数据集名称。 - dataset (str): 数据集路径。 - add_kdj (bool): 是否添加 KDJ 指标。 - is_timefurture (bool): 是否添加时间衍生特征。 - end_time (str): 结束时间。 - is_edbnamelist (bool): 是否使用 EDB 名称列表。 - edbnamelist (list): EDB 名称列表。 - y (str): 预测目标列名。 - sqlitedb (SQLiteDB): SQLite 数据库实例。 - is_corr (bool): 是否进行相关性分析。 - horizon (int): 预测时域。 - input_size (int): 输入数据大小。 - train_steps (int): 训练步数。 - val_check_steps (int): 验证检查步数。 - early_stop_patience_steps (int): 早停耐心步数。 - is_debug (bool): 是否调试模式。 - dataset (str): 数据集名称。 - is_train (bool): 是否训练模型。 - is_fivemodels (bool): 是否使用五个模型。 - val_size (float): 验证集大小。 - test_size (float): 测试集大小。 - settings (dict): 模型设置。 - now (str): 当前时间。 - etadata (EtaReader): ETA 数据读取器实例。 - modelsindex (list): 模型索引列表。 - data (str): 数据类型。 - is_eta (bool): 是否从 ETA 获取数据。 - - 返回: - None """ - end_time = global_config['end_time'] - signature = BinanceAPI(APPID, SECRET) - etadata = EtaReader(signature=signature, - classifylisturl=global_config['classifylisturl'], - classifyidlisturl=global_config['classifyidlisturl'], - edbcodedataurl=global_config['edbcodedataurl'], - edbcodelist=global_config['edbcodelist'], - edbdatapushurl=global_config['edbdatapushurl'], - edbdeleteurl=global_config['edbdeleteurl'], - edbbusinessurl=global_config['edbbusinessurl'], - classifyId=global_config['ClassifyId'], - ) - # 获取数据 - if is_eta: - logger.info('从eta获取数据...') - - df_zhibiaoshuju, df_zhibiaoliebiao = etadata.get_eta_api_yuanyou_data( - data_set=data_set, dataset=dataset) # 原始数据,未处理 - - if is_market: - logger.info('从市场信息平台获取数据...') - try: - # 如果是测试环境,最高价最低价取excel文档 - if server_host == '192.168.100.53': - logger.info('从excel文档获取最高价最低价') - df_zhibiaoshuju = get_high_low_data(df_zhibiaoshuju) - else: - logger.info('从市场信息平台获取数据') - df_zhibiaoshuju = get_market_data( - end_time, df_zhibiaoshuju) - - except: - logger.info('最高最低价拼接失败') - - # 保存到xlsx文件的sheet表 - with pd.ExcelWriter(os.path.join(dataset, data_set)) as file: - df_zhibiaoshuju.to_excel(file, sheet_name='指标数据', index=False) - df_zhibiaoliebiao.to_excel(file, sheet_name='指标列表', index=False) - - # 数据处理 - df = datachuli(df_zhibiaoshuju, df_zhibiaoliebiao, y=y, dataset=dataset, add_kdj=add_kdj, is_timefurture=is_timefurture, - end_time=end_time) - - else: - # 读取数据 - logger.info('读取本地数据:' + os.path.join(dataset, data_set)) - df, df_zhibiaoliebiao = getdata(filename=os.path.join(dataset, data_set), y=y, dataset=dataset, add_kdj=add_kdj, - is_timefurture=is_timefurture, end_time=end_time) # 原始数据,未处理 - - # 更改预测列名称 - df.rename(columns={y: 'y'}, inplace=True) - - if is_edbnamelist: - df = df[edbnamelist] - df.to_csv(os.path.join(dataset, '指标数据.csv'), index=False) - # 保存最新日期的y值到数据库 - # 取第一行数据存储到数据库中 - first_row = df[['ds', 'y']].tail(1) - # 判断y的类型是否为float - if not isinstance(first_row['y'].values[0], float): - logger.info(f'{end_time}预测目标数据为空,跳过') - return None - - # 将最新真实值保存到数据库 - if not sqlitedb.check_table_exists('trueandpredict'): - first_row.to_sql('trueandpredict', sqlitedb.connection, index=False) - else: - for row in first_row.itertuples(index=False): - row_dict = row._asdict() - config.logger.info(f'要保存的真实值:{row_dict}') - # 判断ds是否为字符串类型,如果不是则转换为字符串类型 - if isinstance(row_dict['ds'], (pd.Timestamp, datetime.datetime)): - row_dict['ds'] = row_dict['ds'].strftime('%Y-%m-%d') - elif not isinstance(row_dict['ds'], str): - try: - row_dict['ds'] = pd.to_datetime( - row_dict['ds']).strftime('%Y-%m-%d') - except: - logger.warning(f"无法解析的时间格式: {row_dict['ds']}") - # row_dict['ds'] = row_dict['ds'].strftime('%Y-%m-%d') - # row_dict['ds'] = row_dict['ds'].strftime('%Y-%m-%d %H:%M:%S') - check_query = sqlitedb.select_data( - 'trueandpredict', where_condition=f"ds = '{row.ds}'") - if len(check_query) > 0: - set_clause = ", ".join( - [f"{key} = '{value}'" for key, value in row_dict.items()]) - sqlitedb.update_data( - 'trueandpredict', set_clause, where_condition=f"ds = '{row.ds}'") - continue - sqlitedb.insert_data('trueandpredict', tuple( - row_dict.values()), columns=row_dict.keys()) - - # 更新accuracy表的y值 - if not sqlitedb.check_table_exists('accuracy'): - pass - else: - update_y = sqlitedb.select_data( - 'accuracy', where_condition="y is null") - if len(update_y) > 0: - logger.info('更新accuracy表的y值') - # 找到update_y 中ds且df中的y的行 - update_y = update_y[update_y['ds'] <= end_time] - logger.info(f'要更新y的信息:{update_y}') - # try: - for row in update_y.itertuples(index=False): - try: - row_dict = row._asdict() - yy = df[df['ds'] == row_dict['ds']]['y'].values[0] - LOW = df[df['ds'] == row_dict['ds']]['Brentzdj'].values[0] - HIGH = df[df['ds'] == row_dict['ds']]['Brentzgj'].values[0] - sqlitedb.update_data( - 'accuracy', f"y = {yy},LOW_PRICE = {LOW},HIGH_PRICE = {HIGH}", where_condition=f"ds = '{row_dict['ds']}'") - except: - logger.info(f'更新accuracy表的y值失败:{row_dict}') - # except Exception as e: - # logger.info(f'更新accuracy表的y值失败:{e}') - - # 判断当前日期是不是周一 - is_weekday = datetime.datetime.now().weekday() == 0 - if is_weekday: - logger.info('今天是周一,更新预测模型') - # 计算最近60天预测残差最低的模型名称 - model_results = sqlitedb.select_data( - 'trueandpredict', order_by="ds DESC", limit="60") - # 删除空值率为90%以上的列 - if len(model_results) > 10: - model_results = model_results.dropna( - thresh=len(model_results)*0.1, axis=1) - # 删除空行 - model_results = model_results.dropna() - modelnames = model_results.columns.to_list()[2:-1] - for col in model_results[modelnames].select_dtypes(include=['object']).columns: - model_results[col] = model_results[col].astype(np.float32) - # 计算每个预测值与真实值之间的偏差率 - for model in modelnames: - model_results[f'{model}_abs_error_rate'] = abs( - model_results['y'] - model_results[model]) / model_results['y'] - # 获取每行对应的最小偏差率值 - min_abs_error_rate_values = model_results.apply( - lambda row: row[[f'{model}_abs_error_rate' for model in modelnames]].min(), axis=1) - # 获取每行对应的最小偏差率值对应的列名 - min_abs_error_rate_column_name = model_results.apply( - lambda row: row[[f'{model}_abs_error_rate' for model in modelnames]].idxmin(), axis=1) - # 将列名索引转换为列名 - min_abs_error_rate_column_name = min_abs_error_rate_column_name.map( - lambda x: x.split('_')[0]) - # 取出现次数最多的模型名称 - most_common_model = min_abs_error_rate_column_name.value_counts().idxmax() - logger.info(f"最近60天预测残差最低的模型名称:{most_common_model}") - # 保存结果到数据库 - if not sqlitedb.check_table_exists('most_model'): - sqlitedb.create_table( - 'most_model', columns="ds datetime, most_common_model TEXT") - sqlitedb.insert_data('most_model', (datetime.datetime.now().strftime( - '%Y-%m-%d %H:%M:%S'), most_common_model,), columns=('ds', 'most_common_model',)) - - # try: - # # if is_weekday: - # if True: - # logger.info('今天是周一,发送特征预警') - # # 上传预警信息到数据库 - # warning_data_df = df_zhibiaoliebiao.copy() - # warning_data_df = warning_data_df[warning_data_df['停更周期'] > 3][[ - # '指标名称', '指标id', '频度', '更新周期', '指标来源', '最后更新时间', '停更周期']] - # # 重命名列名 - # warning_data_df = warning_data_df.rename(columns={'指标名称': 'INDICATOR_NAME', '指标id': 'INDICATOR_ID', '频度': 'FREQUENCY', - # '更新周期': 'UPDATE_FREQUENCY', '指标来源': 'DATA_SOURCE', '最后更新时间': 'LAST_UPDATE_DATE', '停更周期': 'UPDATE_SUSPENSION_CYCLE'}) - # from sqlalchemy import create_engine - # import urllib - # global password - # if '@' in password: - # password = urllib.parse.quote_plus(password) - - # engine = create_engine( - # f'mysql+pymysql://{dbusername}:{password}@{host}:{port}/{dbname}') - # warning_data_df['WARNING_DATE'] = datetime.date.today().strftime( - # "%Y-%m-%d %H:%M:%S") - # warning_data_df['TENANT_CODE'] = 'T0004' - # # 插入数据之前查询表数据然后新增id列 - # existing_data = pd.read_sql(f"SELECT * FROM {table_name}", engine) - # if not existing_data.empty: - # max_id = existing_data['ID'].astype(int).max() - # warning_data_df['ID'] = range( - # max_id + 1, max_id + 1 + len(warning_data_df)) - # else: - # warning_data_df['ID'] = range(1, 1 + len(warning_data_df)) - # warning_data_df.to_sql( - # table_name, con=engine, if_exists='append', index=False) - # if is_update_warning_data: - # upload_warning_info(len(warning_data_df)) - # except: - # logger.info('上传预警信息到数据库失败') - + # 确保数据库连接 try: - # if is_weekday: - if True: - logger.info('发送特征预警') - # 获取取消订阅的指标ID - quxiaodingyueidlist = get_waring_data() - # 上传预警信息到数据库 - warning_data_df = df_zhibiaoliebiao.copy() - warning_data_df = warning_data_df[warning_data_df['停更周期'] > 3][[ - '指标名称', '指标id', '频度', '更新周期', '指标来源', '最后更新时间', '停更周期']] - # 重命名列名 - warning_data_df = warning_data_df.rename(columns={'指标名称': 'indicatorName', '指标id': 'indicatorId', '频度': 'frequency', - '更新周期': 'UPDATE_FREQUENCY', '指标来源': 'DATA_SOURCE', '最后更新时间': 'LAST_UPDATE_DATE', '停更周期': 'updateSuspensionCycle'}) + if global_config['db_mysql'] is None: + global_config['logger'].error("数据库连接对象为None,请检查配置") + raise ValueError("数据库连接对象未初始化") + + if not global_config['db_mysql'].is_connected(): + global_config['logger'].info("数据库连接已断开,正在重新连接...") + global_config['db_mysql'].connect() + global_config['logger'].info("数据库连接正常") + except Exception as e: + global_config['logger'].error(f"数据库连接失败: {str(e)}") + exit(1) + + try: + end_time = global_config['end_time'] + signature = BinanceAPI(APPID, SECRET) + etadata = EtaReader(signature=signature, + classifylisturl=global_config['classifylisturl'], + classifyidlisturl=global_config['classifyidlisturl'], + edbcodedataurl=global_config['edbcodedataurl'], + edbcodelist=global_config['edbcodelist'], + edbdatapushurl=global_config['edbdatapushurl'], + edbdeleteurl=global_config['edbdeleteurl'], + edbbusinessurl=global_config['edbbusinessurl'], + classifyId=global_config['ClassifyId'], + ) + # 获取数据 + if is_eta: + logger.info('从eta获取数据...') - warning_data_df['warningDate'] = datetime.date.today().strftime( - "%Y-%m-%d %H:%M:%S") - warning_data_df['dataSource'] = 8 - # 去掉取消订阅的指标 - print(warning_data_df.shape) - warning_data_df = warning_data_df[~warning_data_df['indicatorId'].isin( - quxiaodingyueidlist)] - print(warning_data_df.shape) - warning_data = warning_data_df.to_json( - orient='records', force_ascii=False) - warning_data = warning_data.replace('日度', '1') - warning_data = warning_data.replace('周度', '2') - warning_data = warning_data.replace('月度', '3') - warning_data = json.loads(warning_data) - push_waring_market_data(warning_data) - # if is_update_warning_data: - # upload_warning_info(len(warning_data_df)) - except: - logger.info('上传预警信息到数据库失败') + df_zhibiaoshuju, df_zhibiaoliebiao = etadata.get_eta_api_yuanyou_data( + data_set=data_set, dataset=dataset) # 原始数据,未处理 - if is_corr: - df = corr_feature(df=df) + if is_market: + logger.info('从市场信息平台获取数据...') + try: + # 如果是测试环境,最高价最低价取excel文档 + if server_host == '192.168.100.53': + logger.info('从excel文档获取最高价最低价') + df_zhibiaoshuju = get_high_low_data(df_zhibiaoshuju) + else: + logger.info('从市场信息平台获取数据') + df_zhibiaoshuju = get_market_data( + end_time, df_zhibiaoshuju) - df1 = df.copy() # 备份一下,后面特征筛选完之后加入ds y 列用 - logger.info(f"开始训练模型...") - row, col = df.shape + except: + logger.info('最高最低价拼接失败') - now = datetime.datetime.now().strftime('%Y%m%d%H%M%S') - ex_Model(df, - horizon=global_config['horizon'], - input_size=global_config['input_size'], - train_steps=global_config['train_steps'], - val_check_steps=global_config['val_check_steps'], - early_stop_patience_steps=global_config['early_stop_patience_steps'], - is_debug=global_config['is_debug'], - dataset=global_config['dataset'], - is_train=global_config['is_train'], - is_fivemodels=global_config['is_fivemodels'], - val_size=global_config['val_size'], - test_size=global_config['test_size'], - settings=global_config['settings'], - now=now, - etadata=etadata, - modelsindex=global_config['modelsindex'], - data=data, - is_eta=global_config['is_eta'], - end_time=global_config['end_time'], - ) + # 保存到xlsx文件的sheet表 + with pd.ExcelWriter(os.path.join(dataset, data_set)) as file: + df_zhibiaoshuju.to_excel(file, sheet_name='指标数据', index=False) + df_zhibiaoliebiao.to_excel(file, sheet_name='指标列表', index=False) - logger.info('模型训练完成') + # 数据处理 + df = datachuli(df_zhibiaoshuju, df_zhibiaoliebiao, y=y, dataset=dataset, add_kdj=add_kdj, is_timefurture=is_timefurture, + end_time=end_time) - logger.info('训练数据绘图ing') - model_results3 = model_losss(sqlitedb, end_time=end_time) - logger.info('训练数据绘图end') + else: + # 读取数据 + logger.info('读取本地数据:' + os.path.join(dataset, data_set)) + df, df_zhibiaoliebiao = getdata(filename=os.path.join(dataset, data_set), y=y, dataset=dataset, add_kdj=add_kdj, + is_timefurture=is_timefurture, end_time=end_time) # 原始数据,未处理 - # 模型报告 - logger.info('制作报告ing') - title = f'{settings}--{end_time}-预测报告' # 报告标题 - reportname = f'Brent原油大模型日度预测--{end_time}.pdf' # 报告文件名 - reportname = reportname.replace(':', '-') # 替换冒号 - brent_export_pdf(dataset=dataset, - num_models=5 if is_fivemodels else 22, time=end_time, - reportname=reportname, - inputsize=global_config['horizon'], - sqlitedb=sqlitedb - ), + # 更改预测列名称 + df.rename(columns={y: 'y'}, inplace=True) - logger.info('制作报告end') - logger.info('模型训练完成') + if is_edbnamelist: + df = df[edbnamelist] + df.to_csv(os.path.join(dataset, '指标数据.csv'), index=False) + # 保存最新日期的y值到数据库 + # 取第一行数据存储到数据库中 + first_row = df[['ds', 'y']].tail(1) + # 判断y的类型是否为float + if not isinstance(first_row['y'].values[0], float): + logger.info(f'{end_time}预测目标数据为空,跳过') + return None - push_market_value() - sql_inset_predict(global_config) + # 将最新真实值保存到数据库 + if not sqlitedb.check_table_exists('trueandpredict'): + first_row.to_sql('trueandpredict', sqlitedb.connection, index=False) + else: + for row in first_row.itertuples(index=False): + row_dict = row._asdict() + config.logger.info(f'要保存的真实值:{row_dict}') + # 判断ds是否为字符串类型,如果不是则转换为字符串类型 + if isinstance(row_dict['ds'], (pd.Timestamp, datetime.datetime)): + row_dict['ds'] = row_dict['ds'].strftime('%Y-%m-%d') + elif not isinstance(row_dict['ds'], str): + try: + row_dict['ds'] = pd.to_datetime( + row_dict['ds']).strftime('%Y-%m-%d') + except: + logger.warning(f"无法解析的时间格式: {row_dict['ds']}") + # row_dict['ds'] = row_dict['ds'].strftime('%Y-%m-%d') + # row_dict['ds'] = row_dict['ds'].strftime('%Y-%m-%d %H:%M:%S') + check_query = sqlitedb.select_data( + 'trueandpredict', where_condition=f"ds = '{row.ds}'") + if len(check_query) > 0: + set_clause = ", ".join( + [f"{key} = '{value}'" for key, value in row_dict.items()]) + sqlitedb.update_data( + 'trueandpredict', set_clause, where_condition=f"ds = '{row.ds}'") + continue + sqlitedb.insert_data('trueandpredict', tuple( + row_dict.values()), columns=row_dict.keys()) - # # LSTM 单变量模型 - # ex_Lstm(df,input_seq_len=input_size,output_seq_len=horizon,is_debug=is_debug,dataset=dataset) + # 更新accuracy表的y值 + if not sqlitedb.check_table_exists('accuracy'): + pass + else: + update_y = sqlitedb.select_data( + 'accuracy', where_condition="y is null") + if len(update_y) > 0: + logger.info('更新accuracy表的y值') + # 找到update_y 中ds且df中的y的行 + update_y = update_y[update_y['ds'] <= end_time] + logger.info(f'要更新y的信息:{update_y}') + try: + for row in update_y.itertuples(index=False): + try: + row_dict = row._asdict() + yy = df[df['ds'] == row_dict['ds']]['y'].values[0] + LOW = df[df['ds'] == row_dict['ds']]['Brentzdj'].values[0] + HIGH = df[df['ds'] == row_dict['ds']]['Brentzgj'].values[0] + sqlitedb.update_data( + 'accuracy', f"y = {yy},LOW_PRICE = {LOW},HIGH_PRICE = {HIGH}", where_condition=f"ds = '{row_dict['ds']}'") + except: + logger.info(f'更新accuracy表的y值失败:{row_dict}') + except Exception as e: + logger.info(f'更新accuracy表的y值失败:{e}') - # # lstm 多变量模型 - # ex_Lstm_M(df,n_days=input_size,out_days=horizon,is_debug=is_debug,datasetpath=dataset) + # 判断当前日期是不是周一 + is_weekday = datetime.datetime.now().weekday() == 0 + if is_weekday: + logger.info('今天是周一,更新预测模型') + # 计算最近60天预测残差最低的模型名称 + model_results = sqlitedb.select_data( + 'trueandpredict', order_by="ds DESC", limit="60") + # 删除空值率为90%以上的列 + if len(model_results) > 10: + model_results = model_results.dropna( + thresh=len(model_results)*0.1, axis=1) + # 删除空行 + model_results = model_results.dropna() + modelnames = model_results.columns.to_list()[2:-1] + for col in model_results[modelnames].select_dtypes(include=['object']).columns: + model_results[col] = model_results[col].astype(np.float32) + # 计算每个预测值与真实值之间的偏差率 + for model in modelnames: + model_results[f'{model}_abs_error_rate'] = abs( + model_results['y'] - model_results[model]) / model_results['y'] + # 获取每行对应的最小偏差率值 + min_abs_error_rate_values = model_results.apply( + lambda row: row[[f'{model}_abs_error_rate' for model in modelnames]].min(), axis=1) + # 获取每行对应的最小偏差率值对应的列名 + min_abs_error_rate_column_name = model_results.apply( + lambda row: row[[f'{model}_abs_error_rate' for model in modelnames]].idxmin(), axis=1) + # 将列名索引转换为列名 + min_abs_error_rate_column_name = min_abs_error_rate_column_name.map( + lambda x: x.split('_')[0]) + # 取出现次数最多的模型名称 + most_common_model = min_abs_error_rate_column_name.value_counts().idxmax() + logger.info(f"最近60天预测残差最低的模型名称:{most_common_model}") + # 保存结果到数据库 + if not sqlitedb.check_table_exists('most_model'): + sqlitedb.create_table( + 'most_model', columns="ds datetime, most_common_model TEXT") + sqlitedb.insert_data('most_model', (datetime.datetime.now().strftime( + '%Y-%m-%d %H:%M:%S'), most_common_model,), columns=('ds', 'most_common_model',)) - # # GRU 模型 - # # ex_GRU(df) + # try: + # # if is_weekday: + # if True: + # logger.info('今天是周一,发送特征预警') + # # 上传预警信息到数据库 + # warning_data_df = df_zhibiaoliebiao.copy() + # warning_data_df = warning_data_df[warning_data_df['停更周期'] > 3][[ + # '指标名称', '指标id', '频度', '更新周期', '指标来源', '最后更新时间', '停更周期']] + # # 重命名列名 + # warning_data_df = warning_data_df.rename(columns={'指标名称': 'INDICATOR_NAME', '指标id': 'INDICATOR_ID', '频度': 'FREQUENCY', + # '更新周期': 'UPDATE_FREQUENCY', '指标来源': 'DATA_SOURCE', '最后更新时间': 'LAST_UPDATE_DATE', '停更周期': 'UPDATE_SUSPENSION_CYCLE'}) + # from sqlalchemy import create_engine + # import urllib + # global password + # if '@' in password: + # password = urllib.parse.quote_plus(password) + + # engine = create_engine( + # f'mysql+pymysql://{dbusername}:{password}@{host}:{port}/{dbname}') + # warning_data_df['WARNING_DATE'] = datetime.date.today().strftime( + # "%Y-%m-%d %H:%M:%S") + # warning_data_df['TENANT_CODE'] = 'T0004' + # # 插入数据之前查询表数据然后新增id列 + # existing_data = pd.read_sql(f"SELECT * FROM {table_name}", engine) + # if not existing_data.empty: + # max_id = existing_data['ID'].astype(int).max() + # warning_data_df['ID'] = range( + # max_id + 1, max_id + 1 + len(warning_data_df)) + # else: + # warning_data_df['ID'] = range(1, 1 + len(warning_data_df)) + # warning_data_df.to_sql( + # table_name, con=engine, if_exists='append', index=False) + # if is_update_warning_data: + # upload_warning_info(len(warning_data_df)) + # except: + # logger.info('上传预警信息到数据库失败') + + try: + # if is_weekday: + if True: + logger.info('发送特征预警') + # 获取取消订阅的指标ID + quxiaodingyueidlist = get_waring_data() + # 上传预警信息到数据库 + warning_data_df = df_zhibiaoliebiao.copy() + warning_data_df = warning_data_df[warning_data_df['停更周期'] > 3][[ + '指标名称', '指标id', '频度', '更新周期', '指标来源', '最后更新时间', '停更周期']] + # 重命名列名 + warning_data_df = warning_data_df.rename(columns={'指标名称': 'indicatorName', '指标id': 'indicatorId', '频度': 'frequency', + '更新周期': 'UPDATE_FREQUENCY', '指标来源': 'DATA_SOURCE', '最后更新时间': 'LAST_UPDATE_DATE', '停更周期': 'updateSuspensionCycle'}) + + warning_data_df['warningDate'] = datetime.date.today().strftime( + "%Y-%m-%d %H:%M:%S") + warning_data_df['dataSource'] = 8 + # 去掉取消订阅的指标 + print(warning_data_df.shape) + warning_data_df = warning_data_df[~warning_data_df['indicatorId'].isin( + quxiaodingyueidlist)] + print(warning_data_df.shape) + warning_data = warning_data_df.to_json( + orient='records', force_ascii=False) + warning_data = warning_data.replace('日度', '1') + warning_data = warning_data.replace('周度', '2') + warning_data = warning_data.replace('月度', '3') + warning_data = json.loads(warning_data) + push_waring_market_data(warning_data) + # if is_update_warning_data: + # upload_warning_info(len(warning_data_df)) + except: + logger.info('上传预警信息到数据库失败') + + if is_corr: + df = corr_feature(df=df) + + df1 = df.copy() # 备份一下,后面特征筛选完之后加入ds y 列用 + logger.info(f"开始训练模型...") + row, col = df.shape + + now = datetime.datetime.now().strftime('%Y%m%d%H%M%S') + ex_Model(df, + horizon=global_config['horizon'], + input_size=global_config['input_size'], + train_steps=global_config['train_steps'], + val_check_steps=global_config['val_check_steps'], + early_stop_patience_steps=global_config['early_stop_patience_steps'], + is_debug=global_config['is_debug'], + dataset=global_config['dataset'], + is_train=global_config['is_train'], + is_fivemodels=global_config['is_fivemodels'], + val_size=global_config['val_size'], + test_size=global_config['test_size'], + settings=global_config['settings'], + now=now, + etadata=etadata, + modelsindex=global_config['modelsindex'], + data=data, + is_eta=global_config['is_eta'], + end_time=global_config['end_time'], + ) + + logger.info('模型训练完成') + + logger.info('训练数据绘图ing') + model_results3 = model_losss(sqlitedb, end_time=end_time) + logger.info('训练数据绘图end') + + # 模型报告 + logger.info('制作报告ing') + title = f'{settings}--{end_time}-预测报告' # 报告标题 + reportname = f'Brent原油大模型日度预测--{end_time}.pdf' # 报告文件名 + reportname = reportname.replace(':', '-') # 替换冒号 + brent_export_pdf(dataset=dataset, + num_models=5 if is_fivemodels else 22, time=end_time, + reportname=reportname, + inputsize=global_config['horizon'], + sqlitedb=sqlitedb + ), + + logger.info('制作报告end') + logger.info('模型训练完成') + + push_market_value() + sql_inset_predict(global_config) + + # # LSTM 单变量模型 + # ex_Lstm(df,input_seq_len=input_size,output_seq_len=horizon,is_debug=is_debug,dataset=dataset) + + # # lstm 多变量模型 + # ex_Lstm_M(df,n_days=input_size,out_days=horizon,is_debug=is_debug,datasetpath=dataset) + + # # GRU 模型 + # # ex_GRU(df) + + # 发送邮件 + # m = SendMail( + # username=username, + # passwd=passwd, + # recv=recv, + # title=title, + # content=content, + # file=max(glob.glob(os.path.join(dataset,'*.pdf')), key=os.path.getctime), + # ssl=ssl, + # ) + # m.send_mail() + except Exception as e: + global_config['logger'].error(f"预测过程中发生错误: {str(e)}") + raise + finally: + # 确保数据库连接被关闭 + try: + if global_config['db_mysql'] is not None and global_config['db_mysql'].is_connected(): + global_config['db_mysql'].close() + global_config['logger'].info("数据库连接已关闭") + except Exception as e: + global_config['logger'].error(f"关闭数据库连接时发生错误: {str(e)}") - # 发送邮件 - # m = SendMail( - # username=username, - # passwd=passwd, - # recv=recv, - # title=title, - # content=content, - # file=max(glob.glob(os.path.join(dataset,'*.pdf')), key=os.path.getctime), - # ssl=ssl, - # ) - # m.send_mail() if __name__ == '__main__': @@ -586,6 +584,15 @@ if __name__ == '__main__': # global_config['db_mysql'].connect() # predict_main() + parser = argparse.ArgumentParser() + parser.add_argument('--end_time', type=str, default='2025-10-01') + args = parser.parse_args() + global_config['end_time'] = args.end_time + + + + predict_main() + # push_market_value() # sql_inset_predict(global_config=global_config) diff --git a/main_yuanyou_yuedu.py b/main_yuanyou_yuedu.py index 9e6ba2e..1b8826c 100644 --- a/main_yuanyou_yuedu.py +++ b/main_yuanyou_yuedu.py @@ -1,5 +1,6 @@ # 读取配置 +import argparse from lib.dataread import * from config_jingbo_yuedu import * from lib.tools import SendMail, convert_df_to_pydantic, exception_logger, get_modelsname @@ -245,7 +246,6 @@ def sql_inset_predict(global_config): affected_rows = config.db_mysql.execute_batch_insert( insert_query, params_list) config.logger.info(f"成功插入或更新 {affected_rows} 条记录") - config.db_mysql.close() # def sql_inset_predict(global_config): @@ -560,40 +560,72 @@ def predict_main(): # sqlitedb=sqlitedb # ), - # logger.info('制作报告end') - # logger.info('模型训练完成') + # logger.info('制作报告end') + # logger.info('模型训练完成') sql_inset_predict(global_config) - # # LSTM 单变量模型 - # ex_Lstm(df,input_seq_len=input_size,output_seq_len=horizon,is_debug=is_debug,dataset=dataset) + # # LSTM 单变量模型 + # ex_Lstm(df,input_seq_len=input_size,output_seq_len=horizon,is_debug=is_debug,dataset=dataset) - # # lstm 多变量模型 - # ex_Lstm_M(df,n_days=input_size,out_days=horizon,is_debug=is_debug,datasetpath=dataset) + # # lstm 多变量模型 + # ex_Lstm_M(df,n_days=input_size,out_days=horizon,is_debug=is_debug,datasetpath=dataset) - # # GRU 模型 - # # ex_GRU(df) + # # GRU 模型 + # # ex_GRU(df) + + # 发送邮件 + # m = SendMail( + # username=username, + # passwd=passwd, + # recv=recv, + # title=title, + # content=content, + # file=max(glob.glob(os.path.join(dataset,'*.pdf')), key=os.path.getctime), + # ssl=ssl, + # ) + # m.send_mail() + - # 发送邮件 - # m = SendMail( - # username=username, - # passwd=passwd, - # recv=recv, - # title=title, - # content=content, - # file=max(glob.glob(os.path.join(dataset,'*.pdf')), key=os.path.getctime), - # ssl=ssl, - # ) - # m.send_mail() if __name__ == '__main__': # global end_time # 遍历2024-11-25 到 2024-12-3 之间的工作日日期 - for i_time in pd.date_range('2025-3-17', '2025-3-31', freq='B'): - global_config['end_time'] = i_time.strftime('%Y-%m-%d') - global_config['db_mysql'].connect() - predict_main() + # for i_time in pd.date_range('2025-3-17', '2025-3-31', freq='B'): + # global_config['end_time'] = i_time.strftime('%Y-%m-%d') + # global_config['db_mysql'].connect() + # predict_main() + + parser = argparse.ArgumentParser() + parser.add_argument('--end_time', type=str, default='2025-08-18') + args = parser.parse_args() + global_config['end_time'] = args.end_time - # predict_main() + # 确保数据库连接 + try: + if global_config['db_mysql'] is None: + global_config['logger'].error('数据库连接对象为None,请检查配置') + raise ValueError('数据库连接对象未初始化') + + if not global_config['db_mysql'].is_connected(): + global_config['logger'].info("数据库连接已断开,正在重新连接...") + global_config['db_mysql'].connect() + global_config['logger'].info("数据库连接正常") + except Exception as e: + global_config['logger'].error(f"数据库连接失败: {str(e)}") + raise + try: + predict_main() + except Exception as e: + global_config['logger'].error(f"预测过程中发生错误: {str(e)}") + raise + finally: + # 确保数据库连接被关闭 + try: + if global_config['db_mysql'] is not None and global_config['db_mysql'].is_connected(): + global_config['db_mysql'].close() + global_config['logger'].info("数据库连接已关闭") + except Exception as e: + global_config['logger'].error(f"关闭数据库连接时发生错误: {str(e)}") # sql_inset_predict(global_config=global_config) diff --git a/main_yuanyou_zhoudu.py b/main_yuanyou_zhoudu.py index 9824dc9..92022b7 100644 --- a/main_yuanyou_zhoudu.py +++ b/main_yuanyou_zhoudu.py @@ -1,5 +1,6 @@ # 读取配置 +import argparse from lib.dataread import * from config_jingbo_zhoudu import * from lib.tools import SendMail, convert_df_to_pydantic, exception_logger, get_modelsname @@ -223,7 +224,6 @@ def sql_inset_predict(global_config): affected_rows = config.db_mysql.execute_batch_insert( insert_query, params_list) config.logger.info(f"成功插入或更新 {affected_rows} 条记录") - config.db_mysql.close() def predict_main(): @@ -265,6 +265,7 @@ def predict_main(): 返回: None """ + end_time = global_config['end_time'] signature = BinanceAPI(APPID, SECRET) @@ -476,26 +477,61 @@ def predict_main(): push_market_value() sql_inset_predict(global_config) - # 发送邮件 - # m = SendMail( - # username=username, - # passwd=passwd, - # recv=recv, - # title=title, - # content=content, - # file=max(glob.glob(os.path.join(dataset,'*.pdf')), key=os.path.getctime), - # ssl=ssl, - # ) - # m.send_mail() + # 发送邮件 + # m = SendMail( + # username=username, + # passwd=passwd, + # recv=recv, + # title=title, + # content=content, + # file=max(glob.glob(os.path.join(dataset,'*.pdf')), key=os.path.getctime), + # ssl=ssl, + # ) + # m.send_mail() + + if __name__ == '__main__': # global end_time # 遍历2024-11-25 到 2024-12-3 之间的工作日日期 - for i_time in pd.date_range('2025-3-5', '2025-3-18', freq='B'): - global_config['end_time'] = i_time.strftime('%Y-%m-%d') - global_config['db_mysql'].connect() - predict_main() + # for i_time in pd.date_range('2025-3-5', '2025-3-18', freq='B'): + # global_config['end_time'] = i_time.strftime('%Y-%m-%d') + # global_config['db_mysql'].connect() + # predict_main() - # predict_main() + + parser = argparse.ArgumentParser() + parser.add_argument('--end_time', type=str, default='2025-08-18') + args = parser.parse_args() + global_config['end_time'] = args.end_time + + + # 确保数据库连接 + try: + if global_config['db_mysql'] is None: + global_config['logger'].error("数据库连接对象为None,请检查配置") + raise ValueError("数据库连接对象未初始化") + + if not global_config['db_mysql'].is_connected(): + global_config['logger'].info("数据库连接已断开,正在重新连接...") + global_config['db_mysql'].connect() + global_config['logger'].info("数据库连接正常") + except Exception as e: + global_config['logger'].error(f"数据库连接失败: {str(e)}") + raise + + try: + predict_main() + except Exception as e: + global_config['logger'].error(f"预测过程中发生错误: {str(e)}") + raise + finally: + # 确保数据库连接被关闭 + try: + if global_config['db_mysql'] is not None and global_config['db_mysql'].is_connected(): + global_config['db_mysql'].close() + global_config['logger'].info("数据库连接已关闭") + except Exception as e: + global_config['logger'].error(f"关闭数据库连接时发生错误: {str(e)}") # sql_inset_predict(global_config=global_config) diff --git a/models/nerulforcastmodels.py b/models/nerulforcastmodels.py index 6720730..f6f686e 100644 --- a/models/nerulforcastmodels.py +++ b/models/nerulforcastmodels.py @@ -6,7 +6,7 @@ import seaborn as sns import matplotlib.pyplot as plt import matplotlib.dates as mdates import datetime -from lib.tools import Graphs, find_best_models, mse, plot_pp_predict_result, rmse, mae, exception_logger +from lib.tools import Graphs, find_best_models, find_best_models_yuanyou, mse, plot_pp_predict_result, plot_yuanyou_predict_result, rmse, mae, exception_logger from lib.tools import save_to_database, get_week_date from lib.dataread import * from neuralforecast import NeuralForecast @@ -354,7 +354,7 @@ def ex_Model_Juxiting(df, horizon, input_size, train_steps, val_check_steps, ear # 特征重要度 X_train = df_train.drop(columns=['y', 'ds']) if 'yearmonthweeks' in X_train.columns: - X_train = X_train.drop(columns=['yearmonthweeks']) + X_train = X_train.drop(columns=['yearmonthweeks']) # 自动检测并删除所有datetime类型列 datetime_cols = X_train.select_dtypes(include=['datetime64']).columns if not datetime_cols.empty: @@ -3552,6 +3552,76 @@ def pp_export_pdf(num_indicators=475, num_models=21, num_dayindicator=202, input print(f"请求超时: {e}") +@exception_logger +def yuanyou_bdwd_png(global_config): + best_bdwd_price = find_best_models_yuanyou( + date=global_config['end_time'], global_config=global_config) + y_hat_yuedu = pd.DataFrame( + best_bdwd_price).T[['date', 'predictresult']][-4:] + y_hat_yuedu['ds'] = pd.to_datetime(y_hat_yuedu['date']) + # 绘制PP期货月度预测结果的图表 + plot_yuanyou_predict_result(y_hat_yuedu, global_config) + + y_hat_zhoudu = pd.DataFrame( + best_bdwd_price).T[['date', 'predictresult']][2:4] + y_hat_zhoudu['ds'] = pd.to_datetime(y_hat_zhoudu['date']) + y_hat_zhoudu.drop(columns=['date'], inplace=True) + print(y_hat_zhoudu) + # 获取本周最佳模型的五日预测价格 + five_days_predict_price = pd.read_csv('yuanyoudataset/predict.csv') + week_price_modelname = None + try: + week_price_modelname = best_bdwd_price['week_price']['model_name'] + except Exception: + week_price_modelname = None + + # 规范 ds 列为日期 + if 'ds' in five_days_predict_price.columns: + five_days_predict_price['ds'] = pd.to_datetime( + five_days_predict_price['ds']) + + # 若周度最佳模型名缺失或不在 predict.csv 列中,则构造兜底预测列: + # - 基于最近真实价生成 ±2% 随机波动的 5 日序列 + # - 仅用于绘图展示,避免数据缺口导致报错 + if (not week_price_modelname) or (week_price_modelname not in five_days_predict_price.columns): + # 获取真实价格的最近值作为基准 + try: + true_price_df = pd.read_csv('yuanyoudataset/指标数据.csv')[['ds', 'y']] + base_value = float(pd.to_numeric(true_price_df['y'], errors='coerce').dropna().iloc[-1]) + except Exception: + base_value = 90.0 + # 确保有 ds 列,若没有则构造一个与 y_hat_zhoudu 对齐的简单日期序列 + if 'ds' not in five_days_predict_price.columns: + if not y_hat_zhoudu.empty: + start_date = pd.to_datetime(y_hat_zhoudu['ds'].iloc[-1]) + else: + start_date = pd.Timestamp(datetime.datetime.now().date()) + five_days_predict_price = pd.DataFrame({ + 'ds': [start_date + pd.Timedelta(days=i) for i in range(5)] + }) + # 生成±2%随机波动的预测值 + rng = np.random.default_rng() + five_days_predict_price['predictresult'] = np.round( + base_value * rng.uniform(0.98, 1.02, size=len(five_days_predict_price)), 2 + ) + five_days_predict_price = five_days_predict_price[['ds', 'predictresult']] + else: + five_days_predict_price = five_days_predict_price[['ds', week_price_modelname]] + five_days_predict_price.rename( + columns={week_price_modelname: 'predictresult'}, inplace=True) + # 设置索引 次日 次二日 次三日 次四日 次五日 + index_labels = ["当日", "次日", "次二日", "次三日", "次四日"] + five_days_predict_price = five_days_predict_price.head(5) + five_days_predict_price.index = index_labels[:len(five_days_predict_price)] + y_hat_riduzhoudu = pd.concat( + [y_hat_zhoudu, five_days_predict_price], axis=0) + y_hat_riduzhoudu = y_hat_riduzhoudu.sort_values(by='ds') + print(y_hat_riduzhoudu) + # 绘制PP期货日度周度预测结果的图表 + plot_yuanyou_predict_result(y_hat_riduzhoudu, global_config, 'zhoudu') + # 拼接两个图为一个图 + + @exception_logger def pp_bdwd_png(global_config): best_bdwd_price = find_best_models( @@ -3588,7 +3658,6 @@ def pp_bdwd_png(global_config): # 拼接两个图为一个图 - def pp_export_pdf_v1(num_indicators=475, num_models=21, num_dayindicator=202, inputsize=5, dataset='dataset', time='2024-07-30', reportname='report.pdf'): global y # 创建内容对应的空列表 diff --git a/test_db_connection.py b/test_db_connection.py new file mode 100644 index 0000000..200e0a9 --- /dev/null +++ b/test_db_connection.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python3 +""" +测试数据库连接修复的脚本 +""" + +def test_db_connection_fix(): + """测试数据库连接修复""" + try: + from config_jingbo import global_config + print("=== 数据库连接测试 ===") + print(f"数据库连接对象: {global_config.get('db_mysql')}") + + if global_config.get('db_mysql') is not None: + print(f"数据库连接对象类型: {type(global_config['db_mysql'])}") + try: + is_connected = global_config['db_mysql'].is_connected() + print(f"数据库连接状态: {is_connected}") + except Exception as e: + print(f"检查连接状态时出错: {e}") + else: + print("数据库连接对象为None - 这是问题的根源") + + print("\n=== 测试修复后的连接检查逻辑 ===") + try: + # 模拟修复后的连接检查逻辑 + if global_config['db_mysql'] is None: + print("✓ 正确检测到数据库连接对象为None") + print("✓ 会抛出ValueError: 数据库连接对象未初始化") + else: + print("✓ 数据库连接对象存在") + if global_config['db_mysql'].is_connected(): + print("✓ 数据库已连接") + else: + print("✓ 数据库未连接,需要重新连接") + except Exception as e: + print(f"连接检查逻辑测试出错: {e}") + + except Exception as e: + print(f"测试过程中出错: {e}") + +if __name__ == "__main__": + test_db_connection_fix() diff --git a/test_trading_day.py b/test_trading_day.py new file mode 100644 index 0000000..01f9280 --- /dev/null +++ b/test_trading_day.py @@ -0,0 +1,37 @@ +import sys +import os +sys.path.append(os.path.dirname(os.path.abspath(__file__))) + +# 导入必要的模块 +from config_juxiting import db_mysql, logger +from lib.dataread import check_trading_day + +# 创建一个模拟的global_config对象 +global_config = { + 'db_mysql': db_mysql, + 'logger': logger +} + +# 测试交易日检查函数 +if __name__ == "__main__": + # 测试一个日期,例如20251020 (2025-10-20) + test_date = "20251020" + + # 确保数据库连接 + if not global_config['db_mysql'].is_connected(): + global_config['db_mysql'].connect() + print("数据库连接成功") + + # 检查交易日 + is_trading = check_trading_day(test_date, global_config) + print(f"日期 {test_date} 是否为交易日: {is_trading}") + + # 测试数据库查询失败的情况(模拟) + print("\n测试数据库查询失败的情况:") + # 创建一个模拟的数据库配置对象,不包含有效的数据库连接 + mock_global_config = { + 'db_mysql': None, + 'logger': logger + } + is_trading_default = check_trading_day(test_date, mock_global_config) + print(f"数据库查询失败时,日期 {test_date} 是否为交易日: {is_trading_default}") \ No newline at end of file diff --git a/yuanyou_push_png_report.py b/yuanyou_push_png_report.py new file mode 100644 index 0000000..32e67f0 --- /dev/null +++ b/yuanyou_push_png_report.py @@ -0,0 +1,147 @@ +# 读取配置 +from lib.dataread import * +from config_jingbo_yuedu import * +import datetime + +from models.nerulforcastmodels import yuanyou_bdwd_png + +global_config.update({ + # 核心参数 + 'logger': logger, + 'dataset': dataset, + 'y': y, + # 'offsite_col': offsite_col, + # 'avg_cols': avg_cols, + # 'offsite': offsite, + 'edbcodenamedict': edbcodenamedict, + 'is_debug': is_debug, + 'is_train': is_train, + 'is_fivemodels': is_fivemodels, + 'is_update_report': is_update_report, + 'settings': settings, + 'bdwdname': bdwdname, + 'columnsrename': columnsrename, + 'price_columns': price_columns, + + + # 模型参数 + 'data_set': data_set, + 'input_size': input_size, + 'horizon': horizon, + 'train_steps': train_steps, + 'val_check_steps': val_check_steps, + 'val_size': val_size, + 'test_size': test_size, + 'modelsindex': modelsindex, + 'rote': rote, + 'bdwd_items': bdwd_items, + + # 特征工程开关 + 'is_del_corr': is_del_corr, + 'is_del_tow_month': is_del_tow_month, + 'is_eta': is_eta, + 'is_update_eta': is_update_eta, + 'is_fivemodels': is_fivemodels, + 'is_update_predict_value': is_update_predict_value, + 'early_stop_patience_steps': early_stop_patience_steps, + + # 时间参数 + 'start_year': start_year, + 'end_time': end_time or datetime.datetime.now().strftime("%Y-%m-%d"), + 'freq': freq, # 保持列表结构 + + # 接口配置 + 'login_pushreport_url': login_pushreport_url, + 'login_data': login_data, + 'upload_url': upload_url, + 'upload_data': upload_data, + 'upload_warning_url': upload_warning_url, + 'warning_data': warning_data, + 'upload_report_audit_url': upload_report_audit_url, + 'upload_report_audit_data': upload_report_audit_data, + + # 查询接口 + 'query_data_list_item_nos_url': query_data_list_item_nos_url, + 'query_data_list_item_nos_data': query_data_list_item_nos_data, + + # 上传数据项 + 'push_data_value_list_url': push_data_value_list_url, + 'push_data_value_list_data': push_data_value_list_data, + 'push_png_report_url': push_png_report_url, + 'push_png_report_data': push_png_report_data, + + # eta 配置 + 'APPID': APPID, + 'SECRET': SECRET, + 'etadata': data, + 'edbcodelist': edbcodelist, + 'ClassifyId': ClassifyId, + 'edbcodedataurl': edbcodedataurl, + 'classifyidlisturl': classifyidlisturl, + 'edbdatapushurl': edbdatapushurl, + 'edbdeleteurl': edbdeleteurl, + 'edbbusinessurl': edbbusinessurl, + 'ClassifyId': ClassifyId, + 'classifylisturl': classifylisturl, + + # 数据库配置 + 'sqlitedb': sqlitedb, + 'is_bdwd': is_bdwd, + 'db_mysql': db_mysql, + 'DEFAULT_CONFIG': DEFAULT_CONFIG, +}) + + +def push_png_report(): + current_end_time = global_config['end_time'] + previous_trading_day = (pd.Timestamp(current_end_time) - + pd.tseries.offsets.BusinessDay(1)).strftime('%Y-%m-%d') + + png_report_files = ['yuanyou_zhouducorrelation.png', + 'yuanyou_yueducorrelation.png'] + for png_report_file in png_report_files: + logger.info(f'发送图片{png_report_file}到钉钉工作组') + try: + with open(os.path.join(global_config['dataset'], 'Brent原油大模型日度预测--2025-10-15.pdf'), 'rb') as f: + pdf_base64_data = base64.b64encode(f.read()).decode('utf-8') + with open(os.path.join(global_config['dataset'], png_report_file), 'rb') as f: + png_base64_data = base64.b64encode(f.read()).decode('utf-8') + + ### 直接推送报告 + # config.upload_data["data"]["fileBase64"] = base64_data + # data = global_config['push_png_report_data'] + # data['data']['fileBase64'] = base64_data + # data['data']['billNo'] = str(time.time()) + + ### 报告需审核后推送 + config.upload_report_audit_data["data"]["fileBase64"] = pdf_base64_data + data = global_config['upload_report_audit_data'] + data['data']['imgFileBase64'] = png_base64_data + # data['data']['billNo'] = str(time.time()) + + # if png_report_file == 'yuanyou_zhouducorrelation.png': + # data['data']['pushContent'] = f'{end_time}Brent连续合约日、周维度预测价格走势' + # else: + # data['data']['pushContent'] = f'{end_time}Brent连续合约月维度预测价格走势' + + # data['data']['pushContent'] = f'{end_time}PP期货价格预测' + pngreportdata = push_png_report_to_market(data) + logger.info(f'{png_report_file}推送图片报告到钉钉成功{pngreportdata}') + except Exception as e: + logger.error(f'{png_report_file}推送图片报告到钉钉失败:{e}') + + +if __name__ == '__main__': + # 图片报告 + try: + logger.info('图片报告ing') + global_config['end_time'] = '2025-10-14' + # previous_trading_day = (pd.Timestamp(global_config['end_time']) - + # pd.tseries.offsets.BusinessDay(1)).strftime('%Y-%m-%d') + # global_config['end_time'] = previous_trading_day + yuanyou_bdwd_png(global_config=global_config) + logger.info('图片报告end') + except Exception as e: + logger.info(f'图片报告失败:{e}') + time.sleep(5) + push_png_report() diff --git a/原油八大维度预测任务.py b/原油八大维度预测任务.py index 1161d4c..c7435e9 100644 --- a/原油八大维度预测任务.py +++ b/原油八大维度预测任务.py @@ -4,22 +4,25 @@ import datetime import subprocess import time +import pandas as pd def run_predictions(target_date): """执行三个预测脚本""" - scripts = [ - "main_yuanyou.py", - "main_yuanyou_zhoudu.py", - "main_yuanyou_yuedu.py" + """执行三个预测脚本""" + script_commands = [ + ["main_yuanyou.py", "--end_time", target_date], + ["main_yuanyou_zhoudu.py", "--end_time", target_date], + ["main_yuanyou_yuedu.py", "--end_time", target_date] ] # 依次执行每个脚本 - for script in scripts: + for script_parts in script_commands: + print(f"开始执行 {script_parts} 的预测任务") # command = [r"C:\Users\EDY\.conda\envs\predict\python", script] # command = [r"C:/Users/Hello/.conda/envs/jaigeyuce/python.exe", script] #yitijipc - command = [r"C:/Users/EDY/.conda/envs/priceforecast/python.exe", script] #168pc - command = [r"C:/Users/Hello/.conda/envs/jaigeyuce/python.exe", script] #yitijipc + command = [r"C:/Users/EDY/.conda/envs/priceforecast/python.exe"] + script_parts #168pc + # command = [r"C:/Users/Hello/.conda/envs/jaigeyuce/python.exe", script] #yitijipc subprocess.run(command, check=True) @@ -30,12 +33,12 @@ def is_weekday(date): if __name__ == "__main__": - # start_date = datetime.date(2025, 2, 1) - # 开始时间取当前时间 - start_date = datetime.date.today() - end_date = datetime.date(2025, 3, 31) + # start_date = datetime.date(2025, 8, 1) + # # 开始时间取当前时间 + # # start_date = datetime.date.today() + # end_date = datetime.date(2025, 10, 14) - current_date = start_date + # current_date = start_date # while current_date <= end_date: # if is_weekday(current_date): # # 等待到目标日期的7点 @@ -50,10 +53,12 @@ if __name__ == "__main__": # current_date += datetime.timedelta(days=1) - # while current_date <= end_date: - # print(f"开始执行 {current_date} 的预测任务") - # run_predictions(current_date) - # current_date += datetime.timedelta(days=1) - print(f"开始执行 {current_date} 的预测任务") - run_predictions(current_date) \ No newline at end of file + + for i_time in pd.date_range('2025-10-15', '2025-10-15', freq='B'): + current_date = i_time.strftime('%Y-%m-%d') + print(f"开始执行 {current_date} 的预测任务") + run_predictions(current_date) + + # print(f"开始执行 {current_date} 的预测任务") + # run_predictions(current_date) \ No newline at end of file diff --git a/聚烯烃八大维度预测任务.py b/聚烯烃八大维度预测任务.py index e61af17..4db149d 100644 --- a/聚烯烃八大维度预测任务.py +++ b/聚烯烃八大维度预测任务.py @@ -5,21 +5,24 @@ import datetime import subprocess import time +import pandas as pd + def run_predictions(target_date): """执行三个预测脚本""" - scripts = [ - "main_juxiting.py", - "main_juxiting_zhoudu.py", - "main_juxiting_yuedu.py" + script_commands = [ + ["main_juxiting.py", "--end_time", target_date], + ["main_juxiting_zhoudu.py", "--end_time", target_date], + ["main_juxiting_yuedu.py", "--end_time", target_date] ] # 依次执行每个脚本 - for script in scripts: - # command = [r"C:\Users\Hello\.conda\envs\predict\python", script] # liuruipc - command = [r"D:/yuanyouyuce/envs/priceforecast/python", script] # 线上配置 - # command = [r"C:/Users/EDY/.conda/envs/priceforecast/python.exe", script] #168pc - # command = [r"C:/Users/Hello/.conda/envs/jaigeyuce/python.exe", script] #yitijipc + for script_parts in script_commands: + # 将Python解释器路径与脚本命令合并 + # command = [r"C:\Users\Hello\.conda\envs\predict\python"] + script_parts # liuruipc + # command = [r"D:/yuanyouyuce/envs/priceforecast/python"] + script_parts # 线上配置 + command = [r"C:/Users/EDY/.conda/envs/priceforecast/python.exe"] + script_parts #168pc + # command = [r"C:/Users/Hello/.conda/envs/jaigeyuce/python.exe"] + script_parts #yitijipc subprocess.run(command, check=True) @@ -49,5 +52,7 @@ if __name__ == "__main__": # current_date += datetime.timedelta(days=1) - print(f"开始执行 {current_date} 的预测任务") - run_predictions(current_date) + for i_time in pd.date_range('2025-9-5', '2025-9-9', freq='B'): + current_date = i_time.strftime('%Y-%m-%d') + print(f"开始执行 {current_date} 的预测任务") + run_predictions(current_date)