diff --git a/config_jingbo.py b/config_jingbo.py index 1c9f779..4c4b0fb 100644 --- a/config_jingbo.py +++ b/config_jingbo.py @@ -168,7 +168,7 @@ upload_data = { ### 开关 is_train = True # 是否训练 is_debug = False # 是否调试 -is_eta = True # 是否使用eta接口 +is_eta = False # 是否使用eta接口 is_timefurture = True # 是否使用时间特征 is_fivemodels = False # 是否使用之前保存的最佳的5个模型 is_edbcode = False # 特征使用edbcoding列表中的 diff --git a/dataset/jbsh_yuanyou.db b/dataset/jbsh_yuanyou.db index 89a4b1a..0daf5e6 100644 Binary files a/dataset/jbsh_yuanyou.db and b/dataset/jbsh_yuanyou.db differ diff --git a/lib/dataread.py b/lib/dataread.py index 863be41..ec93b3c 100644 --- a/lib/dataread.py +++ b/lib/dataread.py @@ -463,6 +463,8 @@ def datachuli(df_zhibiaoshuju,df_zhibiaoliebiao,datecol='date',end_time='',y='y' # 保存每列的最后更新时间到文件 last_update_times_df = pd.DataFrame(columns = ['feature', 'last_update_time','is_value','update_period','warning_date']) + + # 打印每列的最后更新时间 for column, last_update_time in last_update_times.items(): values = [] @@ -476,7 +478,7 @@ def datachuli(df_zhibiaoshuju,df_zhibiaoliebiao,datecol='date',end_time='',y='y' # 计算预警日期 time_diff = (df1[column].dropna().index.to_series().diff().mode()[0]).total_seconds() / 3600 / 24 from datetime import timedelta - early_warning_date = datetime.datetime.strptime(last_update_time, '%Y-%m-%d') + timedelta(days=time_diff)*2 + early_warning_date = datetime.datetime.strptime(last_update_time, '%Y-%m-%d') + timedelta(days=time_diff)*2 + timedelta(days=1) early_warning_date = early_warning_date.strftime('%Y-%m-%d') except KeyError: time_diff = 0 @@ -489,15 +491,25 @@ def datachuli(df_zhibiaoshuju,df_zhibiaoliebiao,datecol='date',end_time='',y='y' last_update_times_df.to_csv(os.path.join(dataset,'last_update_times.csv'), index=False) logger.info('特征停更信息保存到文件:last_update_times.csv') + logger.info(f'删除预警的特征前数据量:{df.shape}') + y_last_update_time = last_update_times['y'] + columns_to_drop = last_update_times_df[last_update_times_df['warning_date'] < y_last_update_time ]['feature'].values.tolist() + df = df.drop(columns = columns_to_drop) + + logger.info(f'删除预警的特征后数据量:{df.shape}') logger.info(f'删除两月不更新特征前数据量:{df.shape}') # 去掉近最后数据对应的日期在六月以前的列,删除近2月的数据是常熟的列 current_date = datetime.datetime.now() two_months_ago = current_date - timedelta(days=180) + def check_column(col_name): ''' - 判断两月不更新指标 + 去掉空值列 + 去掉180天没有更新的列 + 去掉常数值列 + 输入:列名 输出:True or False ''' @@ -505,6 +517,7 @@ def datachuli(df_zhibiaoshuju,df_zhibiaoliebiao,datecol='date',end_time='',y='y' return False df_check_column = df[['ds',col_name]] df_check_column = df_check_column.dropna() + if len(df_check_column) == 0: print(f'空值列:{col_name}') return True @@ -516,8 +529,13 @@ def datachuli(df_zhibiaoshuju,df_zhibiaoliebiao,datecol='date',end_time='',y='y' return corresponding_date < two_months_ago columns_to_drop = df.columns[df.columns.map(check_column)].tolist() df = df.drop(columns = columns_to_drop) + + + logger.info(f'删除两月不更新特征后数据量:{df.shape}') + + # 删除预测列空值的行 df = df.dropna(subset=['y']) logger.info(f'删除预测列为空值的行后数据量:{df.shape}') diff --git a/main_yuanyou.py b/main_yuanyou.py index 86bfb98..51afe07 100644 --- a/main_yuanyou.py +++ b/main_yuanyou.py @@ -141,11 +141,8 @@ def predict_main(): logger.info('制作报告ing') title = f'{settings}--{now}-预测报告' # 报告标题 - if 'Brent' in y: - brent_export_pdf(dataset=dataset,num_models = 5 if is_fivemodels else 22,time=end_time, - reportname=reportname,sqlitedb=sqlitedb), - else: - pp_export_pdf(dataset=dataset,num_models = 5 if is_fivemodels else 22,time=end_time, + + brent_export_pdf(dataset=dataset,num_models = 5 if is_fivemodels else 22,time=end_time, reportname=reportname,sqlitedb=sqlitedb), logger.info('制作报告end')