聚烯烃月度预测数据处理
This commit is contained in:
		
							parent
							
								
									b8ca24e07b
								
							
						
					
					
						commit
						8a2cf77639
					
				| @ -434,7 +434,7 @@ DEFAULT_CONFIG = { | |||||||
| # 开关 | # 开关 | ||||||
| is_train = True  # 是否训练 | is_train = True  # 是否训练 | ||||||
| is_debug = False  # 是否调试 | is_debug = False  # 是否调试 | ||||||
| is_eta = True  # 是否使用eta接口 | is_eta = False  # 是否使用eta接口 | ||||||
| is_market = True  # 是否通过市场信息平台获取特征 ,在is_eta 为true 的情况下生效 | is_market = True  # 是否通过市场信息平台获取特征 ,在is_eta 为true 的情况下生效 | ||||||
| is_timefurture = True  # 是否使用时间特征 | is_timefurture = True  # 是否使用时间特征 | ||||||
| is_fivemodels = False  # 是否使用之前保存的最佳的5个模型 | is_fivemodels = False  # 是否使用之前保存的最佳的5个模型 | ||||||
|  | |||||||
| @ -460,7 +460,7 @@ DEFAULT_CONFIG = { | |||||||
| # 开关 | # 开关 | ||||||
| is_train = True  # 是否训练 | is_train = True  # 是否训练 | ||||||
| is_debug = False  # 是否调试 | is_debug = False  # 是否调试 | ||||||
| is_eta = True  # 是否使用eta接口 | is_eta = False  # 是否使用eta接口 | ||||||
| is_market = True  # 是否通过市场信息平台获取特征 ,在is_eta 为true 的情况下生效 | is_market = True  # 是否通过市场信息平台获取特征 ,在is_eta 为true 的情况下生效 | ||||||
| is_timefurture = True  # 是否使用时间特征 | is_timefurture = True  # 是否使用时间特征 | ||||||
| is_fivemodels = False  # 是否使用之前保存的最佳的5个模型 | is_fivemodels = False  # 是否使用之前保存的最佳的5个模型 | ||||||
|  | |||||||
							
								
								
									
										113
									
								
								lib/dataread.py
									
									
									
									
									
								
							
							
						
						
									
										113
									
								
								lib/dataread.py
									
									
									
									
									
								
							| @ -568,44 +568,66 @@ def feature_importance(X_train, y_train): | |||||||
|     temp = XGBRegressor() |     temp = XGBRegressor() | ||||||
|     temp.fit(X_train, y_train) |     temp.fit(X_train, y_train) | ||||||
| 
 | 
 | ||||||
|     ax = plot_importance(temp) |     # 获取特征重要性 | ||||||
|     fig = ax.figure |     importances = temp.feature_importances_ | ||||||
|     fig.set_size_inches(8, 7) |     indices = np.argsort(importances)[::-1]  # 按重要性降序排列 | ||||||
| 
 | 
 | ||||||
|     # 修改图的标题,添加模型名称 |     # 获取前10个特征的索引和重要性 | ||||||
|     title = '特征重要度1'  # 替换为你的模型名称 |     top_indices = indices[:10] | ||||||
|     ax.set_title(title) |     top_importances = importances[top_indices] | ||||||
|  |     top_features = [X_train.columns[i] for i in top_indices] | ||||||
| 
 | 
 | ||||||
|  |     # 计算百分比(相对于所有特征的总和) | ||||||
|  |     total_all_importance = sum(importances)  # 所有特征的重要性总和 | ||||||
|  |     percentages = (top_importances / total_all_importance) * 100 | ||||||
|  | 
 | ||||||
|  |     # 绘制特征重要性图 | ||||||
|  |     plt.figure(figsize=(10, 8)) | ||||||
|  |     ax = plt.gca() | ||||||
|  | 
 | ||||||
|  |     # 绘制条形图 | ||||||
|  |     for i, (importance, percentage) in enumerate(zip(top_importances, percentages)): | ||||||
|  |         ax.barh(i, importance, color='skyblue') | ||||||
|  |         ax.text(importance + 0.01, i, f'{percentage:.1f}%', va='center') | ||||||
|  | 
 | ||||||
|  |     # 设置y轴标签和标题 | ||||||
|  |     ax.set_yticks(range(len(top_features))) | ||||||
|  |     ax.set_yticklabels(top_features) | ||||||
|  |     ax.set_xlabel('特征重要性') | ||||||
|  |     ax.set_title('特征重要性排序(前10位)') | ||||||
|  | 
 | ||||||
|  |     # 调整布局并显示 | ||||||
|  |     plt.tight_layout() | ||||||
|     # plt.show() |     # plt.show() | ||||||
|     # 保存图片 |     # 保存图片 | ||||||
|     plt.savefig(os.path.join(config.dataset, '特征重要度1.png')) |     plt.savefig(os.path.join(config.dataset, '特征重要度1.png')) | ||||||
|     plt.close() |     plt.close() | ||||||
|     config.logger.info('特征重要度1.png 已保存') |     config.logger.info('特征重要度1.png 已保存') | ||||||
| 
 | 
 | ||||||
|     # 创建一个 LGBMRegressor 对象并训练模型 |     # # 创建一个 LGBMRegressor 对象并训练模型 | ||||||
|     regressor = lgb.LGBMRegressor() |     # regressor = lgb.LGBMRegressor() | ||||||
|     regressor.fit(X_train, y_train) |     # regressor.fit(X_train, y_train) | ||||||
| 
 | 
 | ||||||
|     # 设置图形大小(可选) |     # # 设置图形大小(可选) | ||||||
|     plt.figure(figsize=(30, 40)) |     # plt.figure(figsize=(30, 40)) | ||||||
|     # 使用 plot_importance 函数来绘制特征重要性 |     # # 使用 plot_importance 函数来绘制特征重要性 | ||||||
|     # 注意:在一些版本的 LightGBM 中,你可以直接传入模型对象 |     # # 注意:在一些版本的 LightGBM 中,你可以直接传入模型对象 | ||||||
|     ax = importance_plot = lgb.plot_importance( |     # ax = importance_plot = lgb.plot_importance( | ||||||
|         regressor, importance_type='gain')  # 或者 'split' |     #     regressor, importance_type='gain')  # 或者 'split' | ||||||
|     # 设置标题和字体大小 |     # # 设置标题和字体大小 | ||||||
|     ax.set_title('Feature Importance - LGBMRegressor', fontsize=12) |     # ax.set_title('Feature Importance - LGBMRegressor', fontsize=12) | ||||||
|     for item in ([ax.title, ax.xaxis.label, ax.yaxis.label] + |     # for item in ([ax.title, ax.xaxis.label, ax.yaxis.label] + | ||||||
|                  ax.get_xticklabels() + ax.get_yticklabels()): |     #              ax.get_xticklabels() + ax.get_yticklabels()): | ||||||
|         item.set_fontsize(9)  # 设置 x 轴、y 轴标签以及刻度标签的字体大小 |     #     item.set_fontsize(9)  # 设置 x 轴、y 轴标签以及刻度标签的字体大小 | ||||||
|     # 修改图的标题,添加模型名称 |     # # 修改图的标题,添加模型名称 | ||||||
|     # title = 'Feature Importance - LGBMRegressor '  # 替换为你的模型名称 |     # # title = 'Feature Importance - LGBMRegressor '  # 替换为你的模型名称 | ||||||
|     # ax.set_title(title) |     # # ax.set_title(title) | ||||||
|     # 保存图片 |     # # 保存图片 | ||||||
|     plt.savefig(os.path.join(config.dataset, '特征重要度2.png')) |     # plt.savefig(os.path.join(config.dataset, '特征重要度2.png')) | ||||||
| 
 | 
 | ||||||
|     # 显示图形 |     # # 显示图形 | ||||||
|     plt.close() |     # plt.close() | ||||||
|     config.logger.info('特征重要度2.png 已保存') |     # config.logger.info('特征重要度2.png 已保存') | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def corr_feature(df): | def corr_feature(df): | ||||||
| @ -807,7 +829,7 @@ def calculate_kdj(data, n=9): | |||||||
| 
 | 
 | ||||||
| def calculate_correlation(df): | def calculate_correlation(df): | ||||||
|     try: |     try: | ||||||
|         yy = df['y'] |         yy = df['y'][-30:] | ||||||
|         # 去掉ds y |         # 去掉ds y | ||||||
|         df = df.drop(columns=['ds', 'y']) |         df = df.drop(columns=['ds', 'y']) | ||||||
|         # 计算相关系数 |         # 计算相关系数 | ||||||
| @ -1088,25 +1110,26 @@ def datachuli_juxiting(df_zhibiaoshuju, df_zhibiaoliebiao, datecol='date', end_t | |||||||
|     df = df[df['ds'] <= end_time] |     df = df[df['ds'] <= end_time] | ||||||
|     config.logger.info(f'删除两月不更新特征前数据量:{df.shape}') |     config.logger.info(f'删除两月不更新特征前数据量:{df.shape}') | ||||||
|     # 去掉近最后数据对应的日期在两月以前的列,删除近2月的数据是常数的列 |     # 去掉近最后数据对应的日期在两月以前的列,删除近2月的数据是常数的列 | ||||||
|     # current_date = datetime.datetime.now() |     current_date = datetime.datetime.strptime( | ||||||
|     # two_months_ago = current_date - timedelta(days=40) |         global_config['end_time'], '%Y-%m-%d') | ||||||
|  |     two_months_ago = current_date - timedelta(days=40) | ||||||
|     # 检查两月不更新的特征 |     # 检查两月不更新的特征 | ||||||
| 
 | 
 | ||||||
|     # def check_column(col_name): |     def check_column(col_name): | ||||||
|     #     if 'ds' in col_name or 'y' in col_name: |         if 'ds' in col_name or 'y' in col_name: | ||||||
|     #         return False |             return False | ||||||
|     #     df_check_column = df[['ds', col_name]] |         df_check_column = df[['ds', col_name]] | ||||||
|     #     df_check_column = df_check_column.dropna() |         df_check_column = df_check_column.dropna() | ||||||
|     #     if len(df_check_column) == 0: |         if len(df_check_column) == 0: | ||||||
|     #         return True |             return True | ||||||
|     #     if df_check_column[(df_check_column['ds'] >= two_months_ago)].groupby(col_name).ngroups < 2: |         if df_check_column[(df_check_column['ds'] >= two_months_ago)].groupby(col_name).ngroups < 2: | ||||||
|     #         return True |             return True | ||||||
|     #     corresponding_date = df_check_column.iloc[-1]['ds'] |         corresponding_date = df_check_column.iloc[-1]['ds'] | ||||||
|     #     return corresponding_date < two_months_ago |         return corresponding_date < two_months_ago | ||||||
|     # columns_to_drop = df.columns[df.columns.map(check_column)].tolist() |     columns_to_drop = df.columns[df.columns.map(check_column)].tolist() | ||||||
|     # df = df.drop(columns=columns_to_drop) |     df = df.drop(columns=columns_to_drop) | ||||||
| 
 | 
 | ||||||
|     # config.logger.info(f'删除两月不更新特征后数据量:{df.shape}') |     config.logger.info(f'删除两月不更新特征后数据量:{df.shape}') | ||||||
| 
 | 
 | ||||||
|     # 衍生时间特征 |     # 衍生时间特征 | ||||||
|     if is_timefurture: |     if is_timefurture: | ||||||
|  | |||||||
| @ -94,26 +94,32 @@ global_config.update({ | |||||||
| def push_market_value(): | def push_market_value(): | ||||||
|     logger.info('发送预测结果到市场信息平台') |     logger.info('发送预测结果到市场信息平台') | ||||||
|     current_end_time = global_config['end_time'] |     current_end_time = global_config['end_time'] | ||||||
|     previous_trading_day = (pd.Timestamp(current_end_time) -  |     previous_trading_day = (pd.Timestamp(current_end_time) - | ||||||
|                            pd.tseries.offsets.BusinessDay(1)).strftime('%Y-%m-%d') |                             pd.tseries.offsets.BusinessDay(1)).strftime('%Y-%m-%d') | ||||||
| 
 | 
 | ||||||
|     # 读取预测数据和模型评估数据 |     # 读取预测数据和模型评估数据 | ||||||
|     best_bdwd_price = find_best_models( |     best_bdwd_price = find_best_models( | ||||||
|         date=previous_trading_day, global_config=global_config) |         date=previous_trading_day, global_config=global_config) | ||||||
|      | 
 | ||||||
|      # 获取本月最佳模型的预测价格 |     # 获取本月最佳模型的预测价格 | ||||||
|     four_month_predict_price = pd.read_csv(os.path.join(global_config['dataset'], 'predict.csv')) |     four_month_predict_price = pd.read_csv( | ||||||
|     four_month_predict_price['ds'] = pd.to_datetime(four_month_predict_price['ds']) |         os.path.join(global_config['dataset'], 'predict.csv')) | ||||||
|  |     four_month_predict_price['ds'] = pd.to_datetime( | ||||||
|  |         four_month_predict_price['ds']) | ||||||
|     # 设置索引 次月 次二月 次三月 次四月 |     # 设置索引 次月 次二月 次三月 次四月 | ||||||
|     index_labels = ["次月", "次二月", "次三月", "次四月"] |     index_labels = ["次月", "次二月", "次三月", "次四月"] | ||||||
|     four_month_predict_price.index = index_labels  |     four_month_predict_price.index = index_labels | ||||||
|     global_config['logger'].info(f"best_bdwd_price: {best_bdwd_price}") |     global_config['logger'].info(f"best_bdwd_price: {best_bdwd_price}") | ||||||
| 
 | 
 | ||||||
|     # 准备要推送的数据 |     # 准备要推送的数据 | ||||||
|     ciyue_mean = four_month_predict_price[best_bdwd_price['next_month_price']['model_name']].iloc[0] |     ciyue_mean = four_month_predict_price[best_bdwd_price['next_month_price'] | ||||||
|     cieryue_mean = four_month_predict_price[best_bdwd_price['next_february_price']['model_name']].iloc[1] |                                           ['model_name']].iloc[0] | ||||||
|     cisanyue_mean = four_month_predict_price[best_bdwd_price['next_march_price']['model_name']].iloc[2] |     cieryue_mean = four_month_predict_price[best_bdwd_price['next_february_price'] | ||||||
|     cisieryue_mean = four_month_predict_price[best_bdwd_price['next_april_price']['model_name']].iloc[3] |                                             ['model_name']].iloc[1] | ||||||
|  |     cisanyue_mean = four_month_predict_price[best_bdwd_price['next_march_price'] | ||||||
|  |                                              ['model_name']].iloc[2] | ||||||
|  |     cisieryue_mean = four_month_predict_price[best_bdwd_price['next_april_price'] | ||||||
|  |                                               ['model_name']].iloc[3] | ||||||
|     # # 保留两位小数 |     # # 保留两位小数 | ||||||
|     ciyue_mean = round(ciyue_mean, 2) |     ciyue_mean = round(ciyue_mean, 2) | ||||||
|     cieryue_mean = round(cieryue_mean, 2) |     cieryue_mean = round(cieryue_mean, 2) | ||||||
| @ -331,8 +337,8 @@ def predict_main(): | |||||||
|     else: |     else: | ||||||
|         # 读取数据 |         # 读取数据 | ||||||
|         logger.info('读取本地数据:' + os.path.join(dataset, data_set)) |         logger.info('读取本地数据:' + os.path.join(dataset, data_set)) | ||||||
|         df, df_zhibiaoliebiao = getdata_zhoudu_juxiting(filename=os.path.join(dataset, data_set), y=y, dataset=dataset, add_kdj=add_kdj, |         df, df_zhibiaoliebiao = getdata_juxiting(filename=os.path.join(dataset, data_set), y=y, dataset=dataset, add_kdj=add_kdj, | ||||||
|                                                         is_timefurture=is_timefurture, end_time=end_time)  # 原始数据,未处理 |                                                  is_timefurture=is_timefurture, end_time=end_time)  # 原始数据,未处理 | ||||||
| 
 | 
 | ||||||
|     # 更改预测列名称 |     # 更改预测列名称 | ||||||
|     df.rename(columns={y: 'y'}, inplace=True) |     df.rename(columns={y: 'y'}, inplace=True) | ||||||
| @ -450,25 +456,25 @@ def predict_main(): | |||||||
| 
 | 
 | ||||||
|     now = datetime.datetime.now().strftime('%Y%m%d%H%M%S') |     now = datetime.datetime.now().strftime('%Y%m%d%H%M%S') | ||||||
|     ex_Model_Juxiting(df, |     ex_Model_Juxiting(df, | ||||||
|              horizon=global_config['horizon'], |                       horizon=global_config['horizon'], | ||||||
|              input_size=global_config['input_size'], |                       input_size=global_config['input_size'], | ||||||
|              train_steps=global_config['train_steps'], |                       train_steps=global_config['train_steps'], | ||||||
|              val_check_steps=global_config['val_check_steps'], |                       val_check_steps=global_config['val_check_steps'], | ||||||
|              early_stop_patience_steps=global_config['early_stop_patience_steps'], |                       early_stop_patience_steps=global_config['early_stop_patience_steps'], | ||||||
|              is_debug=global_config['is_debug'], |                       is_debug=global_config['is_debug'], | ||||||
|              dataset=global_config['dataset'], |                       dataset=global_config['dataset'], | ||||||
|              is_train=global_config['is_train'], |                       is_train=global_config['is_train'], | ||||||
|              is_fivemodels=global_config['is_fivemodels'], |                       is_fivemodels=global_config['is_fivemodels'], | ||||||
|              val_size=global_config['val_size'], |                       val_size=global_config['val_size'], | ||||||
|              test_size=global_config['test_size'], |                       test_size=global_config['test_size'], | ||||||
|              settings=global_config['settings'], |                       settings=global_config['settings'], | ||||||
|              now=now, |                       now=now, | ||||||
|              etadata=etadata, |                       etadata=etadata, | ||||||
|              modelsindex=global_config['modelsindex'], |                       modelsindex=global_config['modelsindex'], | ||||||
|              data=data, |                       data=data, | ||||||
|              is_eta=global_config['is_eta'], |                       is_eta=global_config['is_eta'], | ||||||
|              end_time=global_config['end_time'], |                       end_time=global_config['end_time'], | ||||||
|              ) |                       ) | ||||||
| 
 | 
 | ||||||
|     logger.info('模型训练完成') |     logger.info('模型训练完成') | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -350,7 +350,10 @@ def ex_Model_Juxiting(df, horizon, input_size, train_steps, val_check_steps, ear | |||||||
|     config.logger.info(df_test.head()) |     config.logger.info(df_test.head()) | ||||||
| 
 | 
 | ||||||
|     # 特征重要度 |     # 特征重要度 | ||||||
|     X_train = df_train.drop(columns=['y']) |     X_train = df_train.drop(columns=['y', 'ds']) | ||||||
|  |     if 'yearmonthweeks' in df_train.columns: | ||||||
|  |         X_train = df_train.drop(columns=['yearmonthweeks']) | ||||||
|  | 
 | ||||||
|     y_train = df_train['y'] |     y_train = df_train['y'] | ||||||
|     feature_importance(X_train=X_train, y_train=y_train) |     feature_importance(X_train=X_train, y_train=y_train) | ||||||
| 
 | 
 | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user