聚烯烃月度预测数据处理
This commit is contained in:
		
							parent
							
								
									b8ca24e07b
								
							
						
					
					
						commit
						8a2cf77639
					
				| @ -434,7 +434,7 @@ DEFAULT_CONFIG = { | ||||
| # 开关 | ||||
| is_train = True  # 是否训练 | ||||
| is_debug = False  # 是否调试 | ||||
| is_eta = True  # 是否使用eta接口 | ||||
| is_eta = False  # 是否使用eta接口 | ||||
| is_market = True  # 是否通过市场信息平台获取特征 ,在is_eta 为true 的情况下生效 | ||||
| is_timefurture = True  # 是否使用时间特征 | ||||
| is_fivemodels = False  # 是否使用之前保存的最佳的5个模型 | ||||
|  | ||||
| @ -460,7 +460,7 @@ DEFAULT_CONFIG = { | ||||
| # 开关 | ||||
| is_train = True  # 是否训练 | ||||
| is_debug = False  # 是否调试 | ||||
| is_eta = True  # 是否使用eta接口 | ||||
| is_eta = False  # 是否使用eta接口 | ||||
| is_market = True  # 是否通过市场信息平台获取特征 ,在is_eta 为true 的情况下生效 | ||||
| is_timefurture = True  # 是否使用时间特征 | ||||
| is_fivemodels = False  # 是否使用之前保存的最佳的5个模型 | ||||
|  | ||||
							
								
								
									
										113
									
								
								lib/dataread.py
									
									
									
									
									
								
							
							
						
						
									
										113
									
								
								lib/dataread.py
									
									
									
									
									
								
							| @ -568,44 +568,66 @@ def feature_importance(X_train, y_train): | ||||
|     temp = XGBRegressor() | ||||
|     temp.fit(X_train, y_train) | ||||
| 
 | ||||
|     ax = plot_importance(temp) | ||||
|     fig = ax.figure | ||||
|     fig.set_size_inches(8, 7) | ||||
|     # 获取特征重要性 | ||||
|     importances = temp.feature_importances_ | ||||
|     indices = np.argsort(importances)[::-1]  # 按重要性降序排列 | ||||
| 
 | ||||
|     # 修改图的标题,添加模型名称 | ||||
|     title = '特征重要度1'  # 替换为你的模型名称 | ||||
|     ax.set_title(title) | ||||
|     # 获取前10个特征的索引和重要性 | ||||
|     top_indices = indices[:10] | ||||
|     top_importances = importances[top_indices] | ||||
|     top_features = [X_train.columns[i] for i in top_indices] | ||||
| 
 | ||||
|     # 计算百分比(相对于所有特征的总和) | ||||
|     total_all_importance = sum(importances)  # 所有特征的重要性总和 | ||||
|     percentages = (top_importances / total_all_importance) * 100 | ||||
| 
 | ||||
|     # 绘制特征重要性图 | ||||
|     plt.figure(figsize=(10, 8)) | ||||
|     ax = plt.gca() | ||||
| 
 | ||||
|     # 绘制条形图 | ||||
|     for i, (importance, percentage) in enumerate(zip(top_importances, percentages)): | ||||
|         ax.barh(i, importance, color='skyblue') | ||||
|         ax.text(importance + 0.01, i, f'{percentage:.1f}%', va='center') | ||||
| 
 | ||||
|     # 设置y轴标签和标题 | ||||
|     ax.set_yticks(range(len(top_features))) | ||||
|     ax.set_yticklabels(top_features) | ||||
|     ax.set_xlabel('特征重要性') | ||||
|     ax.set_title('特征重要性排序(前10位)') | ||||
| 
 | ||||
|     # 调整布局并显示 | ||||
|     plt.tight_layout() | ||||
|     # plt.show() | ||||
|     # 保存图片 | ||||
|     plt.savefig(os.path.join(config.dataset, '特征重要度1.png')) | ||||
|     plt.close() | ||||
|     config.logger.info('特征重要度1.png 已保存') | ||||
| 
 | ||||
|     # 创建一个 LGBMRegressor 对象并训练模型 | ||||
|     regressor = lgb.LGBMRegressor() | ||||
|     regressor.fit(X_train, y_train) | ||||
|     # # 创建一个 LGBMRegressor 对象并训练模型 | ||||
|     # regressor = lgb.LGBMRegressor() | ||||
|     # regressor.fit(X_train, y_train) | ||||
| 
 | ||||
|     # 设置图形大小(可选) | ||||
|     plt.figure(figsize=(30, 40)) | ||||
|     # 使用 plot_importance 函数来绘制特征重要性 | ||||
|     # 注意:在一些版本的 LightGBM 中,你可以直接传入模型对象 | ||||
|     ax = importance_plot = lgb.plot_importance( | ||||
|         regressor, importance_type='gain')  # 或者 'split' | ||||
|     # 设置标题和字体大小 | ||||
|     ax.set_title('Feature Importance - LGBMRegressor', fontsize=12) | ||||
|     for item in ([ax.title, ax.xaxis.label, ax.yaxis.label] + | ||||
|                  ax.get_xticklabels() + ax.get_yticklabels()): | ||||
|         item.set_fontsize(9)  # 设置 x 轴、y 轴标签以及刻度标签的字体大小 | ||||
|     # 修改图的标题,添加模型名称 | ||||
|     # title = 'Feature Importance - LGBMRegressor '  # 替换为你的模型名称 | ||||
|     # ax.set_title(title) | ||||
|     # 保存图片 | ||||
|     plt.savefig(os.path.join(config.dataset, '特征重要度2.png')) | ||||
|     # # 设置图形大小(可选) | ||||
|     # plt.figure(figsize=(30, 40)) | ||||
|     # # 使用 plot_importance 函数来绘制特征重要性 | ||||
|     # # 注意:在一些版本的 LightGBM 中,你可以直接传入模型对象 | ||||
|     # ax = importance_plot = lgb.plot_importance( | ||||
|     #     regressor, importance_type='gain')  # 或者 'split' | ||||
|     # # 设置标题和字体大小 | ||||
|     # ax.set_title('Feature Importance - LGBMRegressor', fontsize=12) | ||||
|     # for item in ([ax.title, ax.xaxis.label, ax.yaxis.label] + | ||||
|     #              ax.get_xticklabels() + ax.get_yticklabels()): | ||||
|     #     item.set_fontsize(9)  # 设置 x 轴、y 轴标签以及刻度标签的字体大小 | ||||
|     # # 修改图的标题,添加模型名称 | ||||
|     # # title = 'Feature Importance - LGBMRegressor '  # 替换为你的模型名称 | ||||
|     # # ax.set_title(title) | ||||
|     # # 保存图片 | ||||
|     # plt.savefig(os.path.join(config.dataset, '特征重要度2.png')) | ||||
| 
 | ||||
|     # 显示图形 | ||||
|     plt.close() | ||||
|     config.logger.info('特征重要度2.png 已保存') | ||||
|     # # 显示图形 | ||||
|     # plt.close() | ||||
|     # config.logger.info('特征重要度2.png 已保存') | ||||
| 
 | ||||
| 
 | ||||
| def corr_feature(df): | ||||
| @ -807,7 +829,7 @@ def calculate_kdj(data, n=9): | ||||
| 
 | ||||
| def calculate_correlation(df): | ||||
|     try: | ||||
|         yy = df['y'] | ||||
|         yy = df['y'][-30:] | ||||
|         # 去掉ds y | ||||
|         df = df.drop(columns=['ds', 'y']) | ||||
|         # 计算相关系数 | ||||
| @ -1088,25 +1110,26 @@ def datachuli_juxiting(df_zhibiaoshuju, df_zhibiaoliebiao, datecol='date', end_t | ||||
|     df = df[df['ds'] <= end_time] | ||||
|     config.logger.info(f'删除两月不更新特征前数据量:{df.shape}') | ||||
|     # 去掉近最后数据对应的日期在两月以前的列,删除近2月的数据是常数的列 | ||||
|     # current_date = datetime.datetime.now() | ||||
|     # two_months_ago = current_date - timedelta(days=40) | ||||
|     current_date = datetime.datetime.strptime( | ||||
|         global_config['end_time'], '%Y-%m-%d') | ||||
|     two_months_ago = current_date - timedelta(days=40) | ||||
|     # 检查两月不更新的特征 | ||||
| 
 | ||||
|     # def check_column(col_name): | ||||
|     #     if 'ds' in col_name or 'y' in col_name: | ||||
|     #         return False | ||||
|     #     df_check_column = df[['ds', col_name]] | ||||
|     #     df_check_column = df_check_column.dropna() | ||||
|     #     if len(df_check_column) == 0: | ||||
|     #         return True | ||||
|     #     if df_check_column[(df_check_column['ds'] >= two_months_ago)].groupby(col_name).ngroups < 2: | ||||
|     #         return True | ||||
|     #     corresponding_date = df_check_column.iloc[-1]['ds'] | ||||
|     #     return corresponding_date < two_months_ago | ||||
|     # columns_to_drop = df.columns[df.columns.map(check_column)].tolist() | ||||
|     # df = df.drop(columns=columns_to_drop) | ||||
|     def check_column(col_name): | ||||
|         if 'ds' in col_name or 'y' in col_name: | ||||
|             return False | ||||
|         df_check_column = df[['ds', col_name]] | ||||
|         df_check_column = df_check_column.dropna() | ||||
|         if len(df_check_column) == 0: | ||||
|             return True | ||||
|         if df_check_column[(df_check_column['ds'] >= two_months_ago)].groupby(col_name).ngroups < 2: | ||||
|             return True | ||||
|         corresponding_date = df_check_column.iloc[-1]['ds'] | ||||
|         return corresponding_date < two_months_ago | ||||
|     columns_to_drop = df.columns[df.columns.map(check_column)].tolist() | ||||
|     df = df.drop(columns=columns_to_drop) | ||||
| 
 | ||||
|     # config.logger.info(f'删除两月不更新特征后数据量:{df.shape}') | ||||
|     config.logger.info(f'删除两月不更新特征后数据量:{df.shape}') | ||||
| 
 | ||||
|     # 衍生时间特征 | ||||
|     if is_timefurture: | ||||
|  | ||||
| @ -102,18 +102,24 @@ def push_market_value(): | ||||
|         date=previous_trading_day, global_config=global_config) | ||||
| 
 | ||||
|     # 获取本月最佳模型的预测价格 | ||||
|     four_month_predict_price = pd.read_csv(os.path.join(global_config['dataset'], 'predict.csv')) | ||||
|     four_month_predict_price['ds'] = pd.to_datetime(four_month_predict_price['ds']) | ||||
|     four_month_predict_price = pd.read_csv( | ||||
|         os.path.join(global_config['dataset'], 'predict.csv')) | ||||
|     four_month_predict_price['ds'] = pd.to_datetime( | ||||
|         four_month_predict_price['ds']) | ||||
|     # 设置索引 次月 次二月 次三月 次四月 | ||||
|     index_labels = ["次月", "次二月", "次三月", "次四月"] | ||||
|     four_month_predict_price.index = index_labels | ||||
|     global_config['logger'].info(f"best_bdwd_price: {best_bdwd_price}") | ||||
| 
 | ||||
|     # 准备要推送的数据 | ||||
|     ciyue_mean = four_month_predict_price[best_bdwd_price['next_month_price']['model_name']].iloc[0] | ||||
|     cieryue_mean = four_month_predict_price[best_bdwd_price['next_february_price']['model_name']].iloc[1] | ||||
|     cisanyue_mean = four_month_predict_price[best_bdwd_price['next_march_price']['model_name']].iloc[2] | ||||
|     cisieryue_mean = four_month_predict_price[best_bdwd_price['next_april_price']['model_name']].iloc[3] | ||||
|     ciyue_mean = four_month_predict_price[best_bdwd_price['next_month_price'] | ||||
|                                           ['model_name']].iloc[0] | ||||
|     cieryue_mean = four_month_predict_price[best_bdwd_price['next_february_price'] | ||||
|                                             ['model_name']].iloc[1] | ||||
|     cisanyue_mean = four_month_predict_price[best_bdwd_price['next_march_price'] | ||||
|                                              ['model_name']].iloc[2] | ||||
|     cisieryue_mean = four_month_predict_price[best_bdwd_price['next_april_price'] | ||||
|                                               ['model_name']].iloc[3] | ||||
|     # # 保留两位小数 | ||||
|     ciyue_mean = round(ciyue_mean, 2) | ||||
|     cieryue_mean = round(cieryue_mean, 2) | ||||
| @ -331,7 +337,7 @@ def predict_main(): | ||||
|     else: | ||||
|         # 读取数据 | ||||
|         logger.info('读取本地数据:' + os.path.join(dataset, data_set)) | ||||
|         df, df_zhibiaoliebiao = getdata_zhoudu_juxiting(filename=os.path.join(dataset, data_set), y=y, dataset=dataset, add_kdj=add_kdj, | ||||
|         df, df_zhibiaoliebiao = getdata_juxiting(filename=os.path.join(dataset, data_set), y=y, dataset=dataset, add_kdj=add_kdj, | ||||
|                                                  is_timefurture=is_timefurture, end_time=end_time)  # 原始数据,未处理 | ||||
| 
 | ||||
|     # 更改预测列名称 | ||||
|  | ||||
| @ -350,7 +350,10 @@ def ex_Model_Juxiting(df, horizon, input_size, train_steps, val_check_steps, ear | ||||
|     config.logger.info(df_test.head()) | ||||
| 
 | ||||
|     # 特征重要度 | ||||
|     X_train = df_train.drop(columns=['y']) | ||||
|     X_train = df_train.drop(columns=['y', 'ds']) | ||||
|     if 'yearmonthweeks' in df_train.columns: | ||||
|         X_train = df_train.drop(columns=['yearmonthweeks']) | ||||
| 
 | ||||
|     y_train = df_train['y'] | ||||
|     feature_importance(X_train=X_train, y_train=y_train) | ||||
| 
 | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user