diff --git a/config_juxiting_yuedu.py b/config_juxiting_yuedu.py index 4f1d92f..41112df 100644 --- a/config_juxiting_yuedu.py +++ b/config_juxiting_yuedu.py @@ -270,9 +270,9 @@ ClassifyId = 1161 # "funcModule": "数据项", # "funcOperation": "查询", # "data": { -# "dateStart":"20200101", -# "dateEnd":"20241231", -# "dataItemNoList":["Brentzdj","Brentzgj"] # 数据项编码,代表 brent最低价和最高价 +# "dateStart":"20150101", +# "dateEnd":"20301231", +# "dataItemNoList":["MAIN_CONFT_SETTLE_PRICE"] # 数据项编码,代表 brent最低价和最高价 # } # } @@ -310,6 +310,10 @@ ClassifyId = 1161 # 'cisiyue': '原油大数据预测|FORECAST|PRICE|M_4', # } +# # 报告中八大维度数据项重命名 +# columnsrename = {'jxtppbdwdbz': '本周', 'jxtppbdwdcey': '次二月', 'jxtppbdwdcr': '次日', 'jxtppbdwdcsiy': '次四月', +# 'jxtppbdwdcsany': '次三月', 'jxtppbdwdcy': '次月', 'jxtppbdwdcz': '次周', 'jxtppbdwdgz': '隔周', } + # # 生产环境数据库 # host = 'rm-2zehj3r1n60ttz9x5.mysql.rds.aliyuncs.com' @@ -320,7 +324,7 @@ ClassifyId = 1161 # table_name = 'v_tbl_crude_oil_warning' -# # 变量定义--测试环境 +# 变量定义--测试环境 server_host = '192.168.100.53:8080' # 内网 # server_host = '183.242.74.28' # 外网 login_pushreport_url = f"http://{server_host}/jingbo-dev/api/server/login" @@ -486,7 +490,7 @@ print("数据库连接成功", host, dbname, dbusername) start_year = 2000 # 数据开始年份 end_time = '' # 数据截取日期 freq = 'M' # 时间频率,"D": 天 "W": 周"M": 月"Q": 季度"A": 年 "H": 小时 "T": 分钟 "S": 秒 "B": 工作日 -delweekenday = True if freq == 'B' else False # 是否删除周末数据 +delweekenday = True # 是否删除周末数据 is_corr = False # 特征是否参与滞后领先提升相关系数 add_kdj = False # 是否添加kdj指标 if add_kdj and is_edbnamelist: diff --git a/config_juxiting_zhoudu.py b/config_juxiting_zhoudu.py index 9c9204c..a1bb345 100644 --- a/config_juxiting_zhoudu.py +++ b/config_juxiting_zhoudu.py @@ -458,13 +458,13 @@ DEFAULT_CONFIG = { # 开关 is_train = True # 是否训练 is_debug = False # 是否调试 -is_eta = True # 是否使用eta接口 +is_eta = False # 是否使用eta接口 is_market = True # 是否通过市场信息平台获取特征 ,在is_eta 为true 的情况下生效 is_timefurture = True # 是否使用时间特征 is_fivemodels = False # 是否使用之前保存的最佳的5个模型 is_edbcode = False # 特征使用edbcoding列表中的 is_edbnamelist = False # 自定义特征,对应上面的edbnamelist -is_update_eta = False # 预测结果上传到eta +is_update_eta = True # 预测结果上传到eta is_update_report = True # 是否上传报告 is_update_warning_data = False # 是否上传预警数据 is_update_predict_value = True # 是否上传预测值到市场信息平台 diff --git a/lib/dataread.py b/lib/dataread.py index cb600fc..5d568c3 100644 --- a/lib/dataread.py +++ b/lib/dataread.py @@ -1079,7 +1079,8 @@ def zhoududatachuli(df_zhibiaoshuju, df_zhibiaoliebiao, datecol='date', end_time return df -def datachuli_juxiting(df_zhibiaoshuju, df_zhibiaoliebiao, datecol='date', end_time='', y='y', dataset='dataset', delweekenday=False, add_kdj=False, is_timefurture=False): +def datachuli_juxiting(df_zhibiaoshuju, df_zhibiaoliebiao, datecol='date', end_time='', y='y', dataset='dataset', delweekenday=True, add_kdj=False, is_timefurture=False): + ''' 聚烯烃特征数据处理函数, 接收的是两个df,一个是指标数据,一个是指标列表 @@ -1138,32 +1139,13 @@ def datachuli_juxiting(df_zhibiaoshuju, df_zhibiaoliebiao, datecol='date', end_t if config.is_add_zt_price: df = addztprice(df=df) - if config.freq == 'WW': - # 自定义周数据 - # 按weekofmothe分组取均值得到新的数据 - df = df.groupby(df['yearmonthweeks']).mean() - # 时间列转换为日期格式字符串 - df['ds'] = df['ds'].dt.strftime('%Y-%m-%d') - elif config.freq == 'W': - # 按周取样 - df = df.resample('W', on='ds').mean().reset_index() - elif config.freq == 'M': - # 按月取样 - if 'yearmonthweeks' in df.columns: - df.drop('yearmonthweeks', axis=1, inplace=True) - df = df.resample('ME', on='ds').mean().reset_index() - - # 删除预测列空值的行 - # df = df.dropna(subset=['y']) - # config.logger.info(f'删除预测列为空值的行后数据量:{df.shape}') + # 删除y列0和''的行 + config.logger.info(f'删除y列0和\'\'的行前数据量:{df.shape}') + df = df[df['y'] != 0.0] + config.logger.info(f'删除y列0和\'\'的行后数据量:{df.shape}') df = df.dropna(axis=1, how='all') config.logger.info(f'删除全为空值的列后数据量:{df.shape}') df.to_csv(os.path.join(dataset, '未填充的特征数据.csv'), index=False) - # 去掉指标列表中的columns_to_drop的行 - df_zhibiaoliebiao = df_zhibiaoliebiao[df_zhibiaoliebiao['指标名称'].isin( - df.columns.tolist())] - df_zhibiaoliebiao.to_csv(os.path.join( - dataset, '特征处理后的指标名称及分类.csv'), index=False) # 频度分析 featurePindu(dataset=dataset) # 向上填充 @@ -1175,6 +1157,33 @@ def datachuli_juxiting(df_zhibiaoshuju, df_zhibiaoliebiao, datecol='date', end_t if delweekenday: df = df[df['ds'].dt.weekday < 5] + if config.freq == 'WW': + # 自定义周数据 + # 按weekofmothe分组取均值得到新的数据 + df = df.groupby(df['yearmonthweeks']).mean() + # 时间列转换为日期格式字符串 + df['ds'] = df['ds'].dt.strftime('%Y-%m-%d') + elif config.freq == 'W': + # 按周取样 + df = df.resample('W', on='ds').mean(numeric_only=True).reset_index() + elif config.freq == 'M': + # 按月取样 + if 'yearmonthweeks' in df.columns: + df.drop('yearmonthweeks', axis=1, inplace=True) + df = df.resample('ME', on='ds').mean().reset_index() + + # 删除预测列空值的行 + # df = df.dropna(subset=['y']) + # config.logger.info(f'删除预测列为空值的行后数据量:{df.shape}') + + + # 去掉指标列表中的columns_to_drop的行 + df_zhibiaoliebiao = df_zhibiaoliebiao[df_zhibiaoliebiao['指标名称'].isin( + df.columns.tolist())] + df_zhibiaoliebiao.to_csv(os.path.join( + dataset, '特征处理后的指标名称及分类.csv'), index=False) + + if add_kdj: df = calculate_kdj(df) @@ -1325,6 +1334,7 @@ def getdata_juxiting(filename, datecol='date', y='y', dataset='', add_kdj=False, df = datachuli_juxiting(df_zhibiaoshuju, df_zhibiaoliebiao, datecol, y=y, dataset=dataset, add_kdj=add_kdj, is_timefurture=is_timefurture, end_time=end_time) + return df, df_zhibiaoliebiao diff --git a/main_juxiting_yuedu.py b/main_juxiting_yuedu.py index 73f870e..697ce15 100644 --- a/main_juxiting_yuedu.py +++ b/main_juxiting_yuedu.py @@ -478,14 +478,26 @@ def predict_main(): logger.info('模型训练完成') - logger.info('训练数据绘图ing') - model_results3 = model_losss_juxiting( - sqlitedb, end_time=global_config['end_time'], is_fivemodels=global_config['is_fivemodels']) - logger.info('训练数据绘图end') + try: + logger.info('训练数据绘图ing') + model_results3 = model_losss_juxiting( + sqlitedb, end_time=global_config['end_time'], is_fivemodels=global_config['is_fivemodels']) + logger.info('训练数据绘图end') + except Exception as e: + logger.info(f'训练数据绘图失败:{e}') - push_market_value() + try: + push_market_value() + logger.info('推送市场值完成') + except Exception as e: + logger.info(f'推送市场值失败:{e}') + + try: + sql_inset_predict(global_config) + logger.info('插入预测数据完成') + except Exception as e: + logger.info(f'插入预测数据失败:{e}') - sql_inset_predict(global_config) # 模型报告 # logger.info('制作报告ing') @@ -530,17 +542,17 @@ def predict_main(): if __name__ == '__main__': # global end_time # 遍历2024-11-25 到 2024-12-3 之间的工作日日期 - for i_time in pd.date_range('2025-7-24', '2025-8-11', freq='B'): - try: - global_config['end_time'] = i_time.strftime('%Y-%m-%d') - global_config['db_mysql'].connect() - predict_main() - except Exception as e: - logger.info(f'预测失败:{e}') - continue + # for i_time in pd.date_range('2025-7-24', '2025-8-12', freq='B'): + # try: + # global_config['end_time'] = i_time.strftime('%Y-%m-%d') + # global_config['db_mysql'].connect() + # predict_main() + # except Exception as e: + # logger.info(f'预测失败:{e}') + # continue - # global_config['end_time'] = '2025-08-05' - # predict_main() + # global_config['end_time'] = '2025-07-25' + predict_main() # push_market_value() # sql_inset_predict(global_config) diff --git a/main_juxiting_zhoudu.py b/main_juxiting_zhoudu.py index 01c33d3..cc52296 100644 --- a/main_juxiting_zhoudu.py +++ b/main_juxiting_zhoudu.py @@ -313,13 +313,13 @@ def predict_main(): df_zhibiaoliebiao.to_excel(file, sheet_name='指标列表', index=False) # 数据处理 - df = zhoududatachuli(df_zhibiaoshuju, df_zhibiaoliebiao, y=global_config['y'], dataset=dataset, add_kdj=add_kdj, is_timefurture=is_timefurture, + df = datachuli_juxiting(df_zhibiaoshuju, df_zhibiaoliebiao, y=global_config['y'], dataset=dataset, add_kdj=add_kdj, is_timefurture=is_timefurture, end_time=end_time) else: # 读取数据 logger.info('读取本地数据:' + os.path.join(dataset, data_set)) - df, df_zhibiaoliebiao = getdata_zhoudu_juxiting(filename=os.path.join(dataset, data_set), y=y, dataset=dataset, add_kdj=add_kdj, + df, df_zhibiaoliebiao = getdata_juxiting(filename=os.path.join(dataset, data_set), y=y, dataset=dataset, add_kdj=add_kdj, is_timefurture=is_timefurture, end_time=end_time) # 原始数据,未处理 # 更改预测列名称 @@ -475,8 +475,17 @@ def predict_main(): # logger.info('制作报告end') - push_market_value() - sql_inset_predict(global_config) + try: + push_market_value() + logger.info('推送市场值完成') + except Exception as e: + logger.info(f'推送市场值失败:{e}') + + try: + sql_inset_predict(global_config) + logger.info('插入预测数据完成') + except Exception as e: + logger.info(f'插入预测数据失败:{e}') # # LSTM 单变量模型 # ex_Lstm(df,input_seq_len=input_size,output_seq_len=horizon,is_debug=is_debug,dataset=dataset) @@ -503,18 +512,18 @@ def predict_main(): if __name__ == '__main__': # global end_time # 遍历2024-11-25 到 2024-12-3 之间的工作日日期 - # for i_time in pd.date_range('2025-7-28', '2025-7-28', freq='B'): - # try: - # global_config['end_time'] = i_time.strftime('%Y-%m-%d') - # global_config['db_mysql'].connect() - # predict_main() - # except Exception as e: - # logger.info(f'预测失败:{e}') - # continue + for i_time in pd.date_range('2025-7-24', '2025-8-12', freq='B'): + try: + global_config['end_time'] = i_time.strftime('%Y-%m-%d') + global_config['db_mysql'].connect() + predict_main() + except Exception as e: + logger.info(f'预测失败:{e}') + continue - global_config['end_time'] = '2025-08-05' - predict_main() + # global_config['end_time'] = '2025-08-05' + # predict_main() # push_market_value()