删除逻辑新增去掉相关性小于0.6的特征

This commit is contained in:
workpc 2024-12-11 16:53:30 +08:00
parent 3937d9a4ea
commit 636c6cdc14
2 changed files with 9 additions and 3 deletions

View File

@ -232,8 +232,8 @@ is_fivemodels = False # 是否使用之前保存的最佳的5个模型
is_edbcode = False # 特征使用edbcoding列表中的
is_edbnamelist = False # 自定义特征对应上面的edbnamelist
is_update_eta = False # 预测结果上传到eta
is_update_report = True # 是否上传报告
is_update_warning_data = True # 是否上传预警数据
is_update_report = False # 是否上传报告
is_update_warning_data = False # 是否上传预警数据
# 连接到数据库

View File

@ -652,7 +652,7 @@ def check_column(df,col_name,two_months_ago):
'''
if 'ds' in col_name or 'y' in col_name:
return False
df_check_column = df[['ds',col_name]]
df_check_column = df[['ds',col_name,'y']]
df_check_column = df_check_column.dropna()
if len(df_check_column) == 0:
@ -662,6 +662,12 @@ def check_column(df,col_name,two_months_ago):
if df_check_column[(df_check_column['ds']>= two_months_ago)].groupby(col_name).ngroups < 2:
print(f'180没有更新{col_name}')
return True
# 判断相关系数大于0.6
if abs(df_check_column[col_name].corr(df_check_column['y'])) < 0.6:
print(f'相关系数小于0.6{col_name}')
return True
corresponding_date = df_check_column.iloc[-1]['ds']
return corresponding_date < two_months_ago