根据预测列判断预警特征并删除预警的特征
This commit is contained in:
parent
e2380f5615
commit
da929edbac
@ -168,7 +168,7 @@ upload_data = {
|
||||
### 开关
|
||||
is_train = True # 是否训练
|
||||
is_debug = False # 是否调试
|
||||
is_eta = True # 是否使用eta接口
|
||||
is_eta = False # 是否使用eta接口
|
||||
is_timefurture = True # 是否使用时间特征
|
||||
is_fivemodels = False # 是否使用之前保存的最佳的5个模型
|
||||
is_edbcode = False # 特征使用edbcoding列表中的
|
||||
|
Binary file not shown.
@ -463,6 +463,8 @@ def datachuli(df_zhibiaoshuju,df_zhibiaoliebiao,datecol='date',end_time='',y='y'
|
||||
# 保存每列的最后更新时间到文件
|
||||
last_update_times_df = pd.DataFrame(columns = ['feature', 'last_update_time','is_value','update_period','warning_date'])
|
||||
|
||||
|
||||
|
||||
# 打印每列的最后更新时间
|
||||
for column, last_update_time in last_update_times.items():
|
||||
values = []
|
||||
@ -476,7 +478,7 @@ def datachuli(df_zhibiaoshuju,df_zhibiaoliebiao,datecol='date',end_time='',y='y'
|
||||
# 计算预警日期
|
||||
time_diff = (df1[column].dropna().index.to_series().diff().mode()[0]).total_seconds() / 3600 / 24
|
||||
from datetime import timedelta
|
||||
early_warning_date = datetime.datetime.strptime(last_update_time, '%Y-%m-%d') + timedelta(days=time_diff)*2
|
||||
early_warning_date = datetime.datetime.strptime(last_update_time, '%Y-%m-%d') + timedelta(days=time_diff)*2 + timedelta(days=1)
|
||||
early_warning_date = early_warning_date.strftime('%Y-%m-%d')
|
||||
except KeyError:
|
||||
time_diff = 0
|
||||
@ -489,15 +491,25 @@ def datachuli(df_zhibiaoshuju,df_zhibiaoliebiao,datecol='date',end_time='',y='y'
|
||||
last_update_times_df.to_csv(os.path.join(dataset,'last_update_times.csv'), index=False)
|
||||
logger.info('特征停更信息保存到文件:last_update_times.csv')
|
||||
|
||||
logger.info(f'删除预警的特征前数据量:{df.shape}')
|
||||
y_last_update_time = last_update_times['y']
|
||||
columns_to_drop = last_update_times_df[last_update_times_df['warning_date'] < y_last_update_time ]['feature'].values.tolist()
|
||||
df = df.drop(columns = columns_to_drop)
|
||||
|
||||
logger.info(f'删除预警的特征后数据量:{df.shape}')
|
||||
|
||||
logger.info(f'删除两月不更新特征前数据量:{df.shape}')
|
||||
# 去掉近最后数据对应的日期在六月以前的列,删除近2月的数据是常熟的列
|
||||
current_date = datetime.datetime.now()
|
||||
two_months_ago = current_date - timedelta(days=180)
|
||||
|
||||
|
||||
def check_column(col_name):
|
||||
'''
|
||||
判断两月不更新指标
|
||||
去掉空值列
|
||||
去掉180天没有更新的列
|
||||
去掉常数值列
|
||||
|
||||
输入:列名
|
||||
输出:True or False
|
||||
'''
|
||||
@ -505,6 +517,7 @@ def datachuli(df_zhibiaoshuju,df_zhibiaoliebiao,datecol='date',end_time='',y='y'
|
||||
return False
|
||||
df_check_column = df[['ds',col_name]]
|
||||
df_check_column = df_check_column.dropna()
|
||||
|
||||
if len(df_check_column) == 0:
|
||||
print(f'空值列:{col_name}')
|
||||
return True
|
||||
@ -516,8 +529,13 @@ def datachuli(df_zhibiaoshuju,df_zhibiaoliebiao,datecol='date',end_time='',y='y'
|
||||
return corresponding_date < two_months_ago
|
||||
columns_to_drop = df.columns[df.columns.map(check_column)].tolist()
|
||||
df = df.drop(columns = columns_to_drop)
|
||||
|
||||
|
||||
|
||||
logger.info(f'删除两月不更新特征后数据量:{df.shape}')
|
||||
|
||||
|
||||
|
||||
# 删除预测列空值的行
|
||||
df = df.dropna(subset=['y'])
|
||||
logger.info(f'删除预测列为空值的行后数据量:{df.shape}')
|
||||
|
@ -141,11 +141,8 @@ def predict_main():
|
||||
|
||||
logger.info('制作报告ing')
|
||||
title = f'{settings}--{now}-预测报告' # 报告标题
|
||||
if 'Brent' in y:
|
||||
brent_export_pdf(dataset=dataset,num_models = 5 if is_fivemodels else 22,time=end_time,
|
||||
reportname=reportname,sqlitedb=sqlitedb),
|
||||
else:
|
||||
pp_export_pdf(dataset=dataset,num_models = 5 if is_fivemodels else 22,time=end_time,
|
||||
|
||||
brent_export_pdf(dataset=dataset,num_models = 5 if is_fivemodels else 22,time=end_time,
|
||||
reportname=reportname,sqlitedb=sqlitedb),
|
||||
|
||||
logger.info('制作报告end')
|
||||
|
Loading…
Reference in New Issue
Block a user