diff --git a/main_juxiting.py b/main_juxiting.py index 73d6304..7b3a525 100644 --- a/main_juxiting.py +++ b/main_juxiting.py @@ -1,7 +1,7 @@ # 读取配置 from lib.dataread import * from lib.tools import SendMail,exception_logger -from models.nerulforcastmodels import ex_Model,model_losss,model_losss_juxiting,brent_export_pdf,tansuanli_export_pdf,pp_export_pdf,model_losss_juxiting +from models.nerulforcastmodels import ex_Model_Juxiting,model_losss,model_losss_juxiting,brent_export_pdf,tansuanli_export_pdf,pp_export_pdf,model_losss_juxiting import glob import torch @@ -226,7 +226,7 @@ def predict_main(): row, col = df.shape now = datetime.datetime.now().strftime('%Y%m%d%H%M%S') - ex_Model(df, + ex_Model_Juxiting(df, horizon=horizon, input_size=input_size, train_steps=train_steps, diff --git a/models/nerulforcastmodels.py b/models/nerulforcastmodels.py index e55fbce..1356506 100644 --- a/models/nerulforcastmodels.py +++ b/models/nerulforcastmodels.py @@ -228,6 +228,197 @@ def ex_Model(df,horizon,input_size,train_steps,val_check_steps,early_stop_patien # return nf_test_preds return +@exception_logger +def ex_Model_Juxiting(df,horizon,input_size,train_steps,val_check_steps,early_stop_patience_steps, + is_debug,dataset,is_train,is_fivemodels,val_size,test_size,settings,now, + etadata,modelsindex,data,is_eta,end_time): + ''' + 模型训练与预测 + :param df: 数据集 + horizon # 预测的步长 + input_size # 输入序列长度 + train_steps # 训练步数,用来限定epoch次数 + val_check_steps # 评估频率 + early_stop_patience_steps # 早停的耐心步数 + :return: 预测结果 + ''' + + # 模型预测列表列名 + # columns2 = [ + # 'NHITS', + # 'Informer', + # 'LSTM', + # 'iTransformer', + # 'TSMixer', + # 'TSMixerx', + # 'PatchTST', + # 'RNN', + # 'GRU', + # 'TCN', + # # 'DeepAR', + # 'DeepAR-median', + # 'DeepAR-lo-90', + # 'DeepAR-lo-80', + # 'DeepAR-hi-80', + # 'DeepAR-hi-90', + # 'BiTCN', + # 'DilatedRNN', + # 'MLP', + # 'DLinear', + # 'NLinear', + # 'TFT', + # 'FEDformer', + # 'StemGNN', + # 'MLPMultivariate', + # 'TiDE', + # 'DeepNPT', + # ] + + df= df.replace(',', '', regex=True) + df = df.rename(columns={'date': 'ds'}) + df['y'] = pd.to_numeric(df['y'], errors='coerce') + df['ds'] = pd.to_datetime(df['ds'], errors='coerce') # 使用errors='coerce'来处理无效日期 + # df 数值列转为 float32 + for col in df.select_dtypes(include=['int']).columns: + df[col] = df[col].astype(np.float32) + + # 设置中文字体 + plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签 + plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号 + + + + # 不筛选特征用下面的 + df_reg = df + df_reg.sort_values('ds', inplace=True) + if is_debug: + df_reg = df_reg[-1000:-1] + + # 计算训练集的结束索引,占总数据的90% + split_index = int(0.9* len(df_reg)) + + # 按照时间顺序划分训练集和测试集 + df_train = df_reg[:split_index] + df_test = df_reg[-split_index:] + df_train['unique_id'] = 1 + df_test['unique_id'] = 1 + + # 显示划分后的数据集的前几行 + logger.info("Training set head:") + logger.info(df_train.head()) + + logger.info("\nTesting set head:") + logger.info(df_test.head()) + + + models = [ + NHITS (h=horizon, input_size=input_size, max_steps=train_steps, val_check_steps=val_check_steps, scaler_type='standard', activation='ReLU', early_stop_patience_steps=early_stop_patience_steps), + Informer(h=horizon, input_size=input_size, max_steps=train_steps, val_check_steps=val_check_steps, scaler_type='standard', early_stop_patience_steps=early_stop_patience_steps ), + LSTM(h=horizon, input_size=input_size, max_steps=train_steps, val_check_steps=val_check_steps, scaler_type='standard', early_stop_patience_steps=early_stop_patience_steps), + iTransformer(h=horizon, input_size=input_size,n_series = 1, max_steps=train_steps, scaler_type='standard', early_stop_patience_steps=early_stop_patience_steps), + TSMixer(h=horizon, input_size=input_size, n_series = 1, max_steps=train_steps, early_stop_patience_steps=early_stop_patience_steps), + TSMixerx(h=horizon, input_size=input_size,n_series = 1, max_steps=train_steps, early_stop_patience_steps=early_stop_patience_steps), + PatchTST(h=horizon, input_size=input_size, max_steps=train_steps, scaler_type='standard', early_stop_patience_steps=early_stop_patience_steps), + RNN(h=horizon, input_size=input_size, max_steps=train_steps, val_check_steps=val_check_steps, scaler_type='standard', early_stop_patience_steps=early_stop_patience_steps), + GRU(h=horizon, input_size=input_size, max_steps=train_steps, val_check_steps=val_check_steps, scaler_type='standard', early_stop_patience_steps=early_stop_patience_steps), + TCN(h=horizon, input_size=input_size, max_steps=train_steps, val_check_steps=val_check_steps, scaler_type='standard', early_stop_patience_steps=early_stop_patience_steps), + # DeepAR(h=horizon, input_size=input_size, max_steps=train_steps, val_check_steps=val_check_steps, scaler_type='standard', early_stop_patience_steps=early_stop_patience_steps), + BiTCN(h=horizon, input_size=input_size, max_steps=train_steps, val_check_steps=val_check_steps, scaler_type='standard', early_stop_patience_steps=early_stop_patience_steps), + DilatedRNN(h=horizon, input_size=input_size, max_steps=train_steps, val_check_steps=val_check_steps, scaler_type='standard', early_stop_patience_steps=early_stop_patience_steps), + MLP(h=horizon, input_size=input_size, max_steps=train_steps, val_check_steps=val_check_steps, scaler_type='standard', early_stop_patience_steps=early_stop_patience_steps), + DLinear(h=horizon, input_size=input_size, max_steps=train_steps, val_check_steps=val_check_steps, scaler_type='standard', early_stop_patience_steps=early_stop_patience_steps), + NLinear(h=horizon, input_size=input_size, max_steps=train_steps, val_check_steps=val_check_steps, scaler_type='standard', early_stop_patience_steps=early_stop_patience_steps), + TFT(h=horizon, input_size=input_size, max_steps=train_steps, val_check_steps=val_check_steps, scaler_type='standard', early_stop_patience_steps=early_stop_patience_steps), + FEDformer(h=horizon, input_size=input_size, max_steps=train_steps, val_check_steps=val_check_steps, scaler_type='standard', early_stop_patience_steps=early_stop_patience_steps), + StemGNN(h=horizon, input_size=input_size,n_series = 1, max_steps=train_steps, val_check_steps=val_check_steps, scaler_type='standard', early_stop_patience_steps=early_stop_patience_steps), + MLPMultivariate(h=horizon, input_size=input_size,n_series = 1, max_steps=train_steps, val_check_steps=val_check_steps, scaler_type='standard', early_stop_patience_steps=early_stop_patience_steps), + TiDE(h=horizon, input_size=input_size, max_steps=train_steps, val_check_steps=val_check_steps, scaler_type='standard', early_stop_patience_steps=early_stop_patience_steps), + DeepNPTS(h=horizon, input_size=input_size, max_steps=train_steps, val_check_steps=val_check_steps, scaler_type='standard', early_stop_patience_steps=early_stop_patience_steps), + + # VanillaTransformer(h=horizon, input_size=input_size, max_steps=train_steps, val_check_steps=val_check_steps, scaler_type='standard', ), //报错了 + # Autoformer(h=horizon, input_size=input_size, max_steps=train_steps, val_check_steps=val_check_steps, scaler_type='standard', ), //报错了 + # NBEATS(h=horizon, input_size=input_size, max_steps=train_steps, val_check_steps=val_check_steps, scaler_type='standard', ), + # NBEATSx (h=horizon, input_size=input_size, max_steps=train_steps, val_check_steps=val_check_steps, scaler_type='standard',activation='ReLU', ), //报错 + # HINT(h=horizon), + + ] + + if is_fivemodels: + # 获取之前存好的最好的五个模型 + with open(os.path.join(dataset,'best_modelnames.txt'), 'r',encoding='utf-8') as f: + best_modelnames = f.readlines()[0] + logger.info(f'获取本地最佳模型名称:{best_modelnames}') + + # 重新拼接models + all_models = models + models = [] + for model in all_models: + if model._get_name() in best_modelnames: + models.append(model) + + # 创建NeuralForecast实例并训练模型 + nf = NeuralForecast(models=models, freq=freq) + + from joblib import dump, load + if is_train: + # 模型交叉验证 + nf_preds = nf.cross_validation(df=df_train, val_size=val_size, test_size=test_size, n_windows=None) + nf_preds.to_csv(os.path.join(dataset,"cross_validation.csv"),index=False) + + nf_preds = nf_preds.reset_index() + # 保存模型 + # 生成文件名,按时间 精确到分 + filename = f'{settings}--{now}.joblib' + #文件名去掉冒号 + filename = filename.replace(':', '-') # 替换冒号 + dump(nf, os.path.join(dataset,filename)) + else: + # glob获取dataset下最新的joblib文件 + import glob + filename = max(glob.glob(os.path.join(dataset,'*.joblib')), key=os.path.getctime) + logger.info('读取模型:'+ filename) + nf = load(filename) + # 测试集预测 + nf_test_preds = nf.cross_validation(df=df_test, val_size=val_size, test_size=test_size, n_windows=None) + # 测试集预测结果保存 + nf_test_preds.to_csv(os.path.join(dataset,"cross_validation.csv"),index=False) + + df_test['ds'] = pd.to_datetime(df_test['ds'], errors='coerce') + + #进行未来时间预测 + df_predict=nf.predict(df_test).reset_index() + # 去掉index列 + df_predict.drop(columns=['index'], inplace=True) + df_predict.astype({col: 'float32' for col in df_predict.columns if col not in ['ds'] }) + + # 添加预测时间 + df_predict['created_dt'] = end_time + + # 保存预测值 + df_predict.to_csv(os.path.join(dataset,"predict.csv"),index=False) + + # 将预测结果保存到数据库 + save_to_database(sqlitedb,df_predict,'predict',end_time) + + # 把预测值上传到eta + if is_update_eta: + df_predict['ds'] = pd.to_datetime(df_predict['ds']) + dates = df_predict['ds'].dt.strftime('%Y-%m-%d') + + for m in modelsindex.keys(): + list = [] + for date,value in zip(dates,df_predict[m].round(2)): + list.append({'Date':date,'Value':value}) + data['DataList'] = list + data['IndexCode'] = modelsindex[m] + data['IndexName'] = f'价格预测{m}模型' + data['Remark'] = m + etadata.push_data(data) + + + # return nf_test_preds + return + # 雍安环境预测评估指数 @exception_logger