diff --git a/config_juxiting.py b/config_juxiting.py index 4f82c48..ead3469 100644 --- a/config_juxiting.py +++ b/config_juxiting.py @@ -434,7 +434,7 @@ DEFAULT_CONFIG = { # 开关 is_train = True # 是否训练 is_debug = False # 是否调试 -is_eta = True # 是否使用eta接口 +is_eta = False # 是否使用eta接口 is_market = True # 是否通过市场信息平台获取特征 ,在is_eta 为true 的情况下生效 is_timefurture = True # 是否使用时间特征 is_fivemodels = False # 是否使用之前保存的最佳的5个模型 diff --git a/config_juxiting_yuedu.py b/config_juxiting_yuedu.py index 4f1d92f..d3b6d46 100644 --- a/config_juxiting_yuedu.py +++ b/config_juxiting_yuedu.py @@ -460,7 +460,7 @@ DEFAULT_CONFIG = { # 开关 is_train = True # 是否训练 is_debug = False # 是否调试 -is_eta = True # 是否使用eta接口 +is_eta = False # 是否使用eta接口 is_market = True # 是否通过市场信息平台获取特征 ,在is_eta 为true 的情况下生效 is_timefurture = True # 是否使用时间特征 is_fivemodels = False # 是否使用之前保存的最佳的5个模型 diff --git a/lib/dataread.py b/lib/dataread.py index ac42870..1f3852a 100644 --- a/lib/dataread.py +++ b/lib/dataread.py @@ -568,44 +568,66 @@ def feature_importance(X_train, y_train): temp = XGBRegressor() temp.fit(X_train, y_train) - ax = plot_importance(temp) - fig = ax.figure - fig.set_size_inches(8, 7) + # 获取特征重要性 + importances = temp.feature_importances_ + indices = np.argsort(importances)[::-1] # 按重要性降序排列 - # 修改图的标题,添加模型名称 - title = '特征重要度1' # 替换为你的模型名称 - ax.set_title(title) + # 获取前10个特征的索引和重要性 + top_indices = indices[:10] + top_importances = importances[top_indices] + top_features = [X_train.columns[i] for i in top_indices] + # 计算百分比(相对于所有特征的总和) + total_all_importance = sum(importances) # 所有特征的重要性总和 + percentages = (top_importances / total_all_importance) * 100 + + # 绘制特征重要性图 + plt.figure(figsize=(10, 8)) + ax = plt.gca() + + # 绘制条形图 + for i, (importance, percentage) in enumerate(zip(top_importances, percentages)): + ax.barh(i, importance, color='skyblue') + ax.text(importance + 0.01, i, f'{percentage:.1f}%', va='center') + + # 设置y轴标签和标题 + ax.set_yticks(range(len(top_features))) + ax.set_yticklabels(top_features) + ax.set_xlabel('特征重要性') + ax.set_title('特征重要性排序(前10位)') + + # 调整布局并显示 + plt.tight_layout() # plt.show() # 保存图片 plt.savefig(os.path.join(config.dataset, '特征重要度1.png')) plt.close() config.logger.info('特征重要度1.png 已保存') - # 创建一个 LGBMRegressor 对象并训练模型 - regressor = lgb.LGBMRegressor() - regressor.fit(X_train, y_train) + # # 创建一个 LGBMRegressor 对象并训练模型 + # regressor = lgb.LGBMRegressor() + # regressor.fit(X_train, y_train) - # 设置图形大小(可选) - plt.figure(figsize=(30, 40)) - # 使用 plot_importance 函数来绘制特征重要性 - # 注意:在一些版本的 LightGBM 中,你可以直接传入模型对象 - ax = importance_plot = lgb.plot_importance( - regressor, importance_type='gain') # 或者 'split' - # 设置标题和字体大小 - ax.set_title('Feature Importance - LGBMRegressor', fontsize=12) - for item in ([ax.title, ax.xaxis.label, ax.yaxis.label] + - ax.get_xticklabels() + ax.get_yticklabels()): - item.set_fontsize(9) # 设置 x 轴、y 轴标签以及刻度标签的字体大小 - # 修改图的标题,添加模型名称 - # title = 'Feature Importance - LGBMRegressor ' # 替换为你的模型名称 - # ax.set_title(title) - # 保存图片 - plt.savefig(os.path.join(config.dataset, '特征重要度2.png')) + # # 设置图形大小(可选) + # plt.figure(figsize=(30, 40)) + # # 使用 plot_importance 函数来绘制特征重要性 + # # 注意:在一些版本的 LightGBM 中,你可以直接传入模型对象 + # ax = importance_plot = lgb.plot_importance( + # regressor, importance_type='gain') # 或者 'split' + # # 设置标题和字体大小 + # ax.set_title('Feature Importance - LGBMRegressor', fontsize=12) + # for item in ([ax.title, ax.xaxis.label, ax.yaxis.label] + + # ax.get_xticklabels() + ax.get_yticklabels()): + # item.set_fontsize(9) # 设置 x 轴、y 轴标签以及刻度标签的字体大小 + # # 修改图的标题,添加模型名称 + # # title = 'Feature Importance - LGBMRegressor ' # 替换为你的模型名称 + # # ax.set_title(title) + # # 保存图片 + # plt.savefig(os.path.join(config.dataset, '特征重要度2.png')) - # 显示图形 - plt.close() - config.logger.info('特征重要度2.png 已保存') + # # 显示图形 + # plt.close() + # config.logger.info('特征重要度2.png 已保存') def corr_feature(df): @@ -807,7 +829,7 @@ def calculate_kdj(data, n=9): def calculate_correlation(df): try: - yy = df['y'] + yy = df['y'][-30:] # 去掉ds y df = df.drop(columns=['ds', 'y']) # 计算相关系数 @@ -1088,25 +1110,26 @@ def datachuli_juxiting(df_zhibiaoshuju, df_zhibiaoliebiao, datecol='date', end_t df = df[df['ds'] <= end_time] config.logger.info(f'删除两月不更新特征前数据量:{df.shape}') # 去掉近最后数据对应的日期在两月以前的列,删除近2月的数据是常数的列 - # current_date = datetime.datetime.now() - # two_months_ago = current_date - timedelta(days=40) + current_date = datetime.datetime.strptime( + global_config['end_time'], '%Y-%m-%d') + two_months_ago = current_date - timedelta(days=40) # 检查两月不更新的特征 - # def check_column(col_name): - # if 'ds' in col_name or 'y' in col_name: - # return False - # df_check_column = df[['ds', col_name]] - # df_check_column = df_check_column.dropna() - # if len(df_check_column) == 0: - # return True - # if df_check_column[(df_check_column['ds'] >= two_months_ago)].groupby(col_name).ngroups < 2: - # return True - # corresponding_date = df_check_column.iloc[-1]['ds'] - # return corresponding_date < two_months_ago - # columns_to_drop = df.columns[df.columns.map(check_column)].tolist() - # df = df.drop(columns=columns_to_drop) + def check_column(col_name): + if 'ds' in col_name or 'y' in col_name: + return False + df_check_column = df[['ds', col_name]] + df_check_column = df_check_column.dropna() + if len(df_check_column) == 0: + return True + if df_check_column[(df_check_column['ds'] >= two_months_ago)].groupby(col_name).ngroups < 2: + return True + corresponding_date = df_check_column.iloc[-1]['ds'] + return corresponding_date < two_months_ago + columns_to_drop = df.columns[df.columns.map(check_column)].tolist() + df = df.drop(columns=columns_to_drop) - # config.logger.info(f'删除两月不更新特征后数据量:{df.shape}') + config.logger.info(f'删除两月不更新特征后数据量:{df.shape}') # 衍生时间特征 if is_timefurture: diff --git a/main_juxiting_yuedu.py b/main_juxiting_yuedu.py index 58b56a1..0785249 100644 --- a/main_juxiting_yuedu.py +++ b/main_juxiting_yuedu.py @@ -94,26 +94,32 @@ global_config.update({ def push_market_value(): logger.info('发送预测结果到市场信息平台') current_end_time = global_config['end_time'] - previous_trading_day = (pd.Timestamp(current_end_time) - - pd.tseries.offsets.BusinessDay(1)).strftime('%Y-%m-%d') + previous_trading_day = (pd.Timestamp(current_end_time) - + pd.tseries.offsets.BusinessDay(1)).strftime('%Y-%m-%d') # 读取预测数据和模型评估数据 best_bdwd_price = find_best_models( date=previous_trading_day, global_config=global_config) - - # 获取本月最佳模型的预测价格 - four_month_predict_price = pd.read_csv(os.path.join(global_config['dataset'], 'predict.csv')) - four_month_predict_price['ds'] = pd.to_datetime(four_month_predict_price['ds']) + + # 获取本月最佳模型的预测价格 + four_month_predict_price = pd.read_csv( + os.path.join(global_config['dataset'], 'predict.csv')) + four_month_predict_price['ds'] = pd.to_datetime( + four_month_predict_price['ds']) # 设置索引 次月 次二月 次三月 次四月 index_labels = ["次月", "次二月", "次三月", "次四月"] - four_month_predict_price.index = index_labels + four_month_predict_price.index = index_labels global_config['logger'].info(f"best_bdwd_price: {best_bdwd_price}") # 准备要推送的数据 - ciyue_mean = four_month_predict_price[best_bdwd_price['next_month_price']['model_name']].iloc[0] - cieryue_mean = four_month_predict_price[best_bdwd_price['next_february_price']['model_name']].iloc[1] - cisanyue_mean = four_month_predict_price[best_bdwd_price['next_march_price']['model_name']].iloc[2] - cisieryue_mean = four_month_predict_price[best_bdwd_price['next_april_price']['model_name']].iloc[3] + ciyue_mean = four_month_predict_price[best_bdwd_price['next_month_price'] + ['model_name']].iloc[0] + cieryue_mean = four_month_predict_price[best_bdwd_price['next_february_price'] + ['model_name']].iloc[1] + cisanyue_mean = four_month_predict_price[best_bdwd_price['next_march_price'] + ['model_name']].iloc[2] + cisieryue_mean = four_month_predict_price[best_bdwd_price['next_april_price'] + ['model_name']].iloc[3] # # 保留两位小数 ciyue_mean = round(ciyue_mean, 2) cieryue_mean = round(cieryue_mean, 2) @@ -331,8 +337,8 @@ def predict_main(): else: # 读取数据 logger.info('读取本地数据:' + os.path.join(dataset, data_set)) - df, df_zhibiaoliebiao = getdata_zhoudu_juxiting(filename=os.path.join(dataset, data_set), y=y, dataset=dataset, add_kdj=add_kdj, - is_timefurture=is_timefurture, end_time=end_time) # 原始数据,未处理 + df, df_zhibiaoliebiao = getdata_juxiting(filename=os.path.join(dataset, data_set), y=y, dataset=dataset, add_kdj=add_kdj, + is_timefurture=is_timefurture, end_time=end_time) # 原始数据,未处理 # 更改预测列名称 df.rename(columns={y: 'y'}, inplace=True) @@ -450,25 +456,25 @@ def predict_main(): now = datetime.datetime.now().strftime('%Y%m%d%H%M%S') ex_Model_Juxiting(df, - horizon=global_config['horizon'], - input_size=global_config['input_size'], - train_steps=global_config['train_steps'], - val_check_steps=global_config['val_check_steps'], - early_stop_patience_steps=global_config['early_stop_patience_steps'], - is_debug=global_config['is_debug'], - dataset=global_config['dataset'], - is_train=global_config['is_train'], - is_fivemodels=global_config['is_fivemodels'], - val_size=global_config['val_size'], - test_size=global_config['test_size'], - settings=global_config['settings'], - now=now, - etadata=etadata, - modelsindex=global_config['modelsindex'], - data=data, - is_eta=global_config['is_eta'], - end_time=global_config['end_time'], - ) + horizon=global_config['horizon'], + input_size=global_config['input_size'], + train_steps=global_config['train_steps'], + val_check_steps=global_config['val_check_steps'], + early_stop_patience_steps=global_config['early_stop_patience_steps'], + is_debug=global_config['is_debug'], + dataset=global_config['dataset'], + is_train=global_config['is_train'], + is_fivemodels=global_config['is_fivemodels'], + val_size=global_config['val_size'], + test_size=global_config['test_size'], + settings=global_config['settings'], + now=now, + etadata=etadata, + modelsindex=global_config['modelsindex'], + data=data, + is_eta=global_config['is_eta'], + end_time=global_config['end_time'], + ) logger.info('模型训练完成') diff --git a/models/nerulforcastmodels.py b/models/nerulforcastmodels.py index 304b791..f279fe5 100644 --- a/models/nerulforcastmodels.py +++ b/models/nerulforcastmodels.py @@ -350,7 +350,10 @@ def ex_Model_Juxiting(df, horizon, input_size, train_steps, val_check_steps, ear config.logger.info(df_test.head()) # 特征重要度 - X_train = df_train.drop(columns=['y']) + X_train = df_train.drop(columns=['y', 'ds']) + if 'yearmonthweeks' in df_train.columns: + X_train = df_train.drop(columns=['yearmonthweeks']) + y_train = df_train['y'] feature_importance(X_train=X_train, y_train=y_train)