聚烯烃月度调试

This commit is contained in:
workpc 2025-08-12 14:07:48 +08:00
parent fd31036ab0
commit fc1c97e4bb
5 changed files with 96 additions and 61 deletions

View File

@ -270,9 +270,9 @@ ClassifyId = 1161
# "funcModule": "数据项",
# "funcOperation": "查询",
# "data": {
# "dateStart":"20200101",
# "dateEnd":"20241231",
# "dataItemNoList":["Brentzdj","Brentzgj"] # 数据项编码,代表 brent最低价和最高价
# "dateStart":"20150101",
# "dateEnd":"20301231",
# "dataItemNoList":["MAIN_CONFT_SETTLE_PRICE"] # 数据项编码,代表 brent最低价和最高价
# }
# }
@ -310,6 +310,10 @@ ClassifyId = 1161
# 'cisiyue': '原油大数据预测|FORECAST|PRICE|M_4',
# }
# # 报告中八大维度数据项重命名
# columnsrename = {'jxtppbdwdbz': '本周', 'jxtppbdwdcey': '次二月', 'jxtppbdwdcr': '次日', 'jxtppbdwdcsiy': '次四月',
# 'jxtppbdwdcsany': '次三月', 'jxtppbdwdcy': '次月', 'jxtppbdwdcz': '次周', 'jxtppbdwdgz': '隔周', }
# # 生产环境数据库
# host = 'rm-2zehj3r1n60ttz9x5.mysql.rds.aliyuncs.com'
@ -320,7 +324,7 @@ ClassifyId = 1161
# table_name = 'v_tbl_crude_oil_warning'
# # 变量定义--测试环境
# 变量定义--测试环境
server_host = '192.168.100.53:8080' # 内网
# server_host = '183.242.74.28' # 外网
login_pushreport_url = f"http://{server_host}/jingbo-dev/api/server/login"
@ -486,7 +490,7 @@ print("数据库连接成功", host, dbname, dbusername)
start_year = 2000 # 数据开始年份
end_time = '' # 数据截取日期
freq = 'M' # 时间频率,"D": 天 "W": 周"M": 月"Q": 季度"A": 年 "H": 小时 "T": 分钟 "S": 秒 "B": 工作日
delweekenday = True if freq == 'B' else False # 是否删除周末数据
delweekenday = True # 是否删除周末数据
is_corr = False # 特征是否参与滞后领先提升相关系数
add_kdj = False # 是否添加kdj指标
if add_kdj and is_edbnamelist:

View File

@ -458,13 +458,13 @@ DEFAULT_CONFIG = {
# 开关
is_train = True # 是否训练
is_debug = False # 是否调试
is_eta = True # 是否使用eta接口
is_eta = False # 是否使用eta接口
is_market = True # 是否通过市场信息平台获取特征 ,在is_eta 为true 的情况下生效
is_timefurture = True # 是否使用时间特征
is_fivemodels = False # 是否使用之前保存的最佳的5个模型
is_edbcode = False # 特征使用edbcoding列表中的
is_edbnamelist = False # 自定义特征对应上面的edbnamelist
is_update_eta = False # 预测结果上传到eta
is_update_eta = True # 预测结果上传到eta
is_update_report = True # 是否上传报告
is_update_warning_data = False # 是否上传预警数据
is_update_predict_value = True # 是否上传预测值到市场信息平台

View File

@ -1079,7 +1079,8 @@ def zhoududatachuli(df_zhibiaoshuju, df_zhibiaoliebiao, datecol='date', end_time
return df
def datachuli_juxiting(df_zhibiaoshuju, df_zhibiaoliebiao, datecol='date', end_time='', y='y', dataset='dataset', delweekenday=False, add_kdj=False, is_timefurture=False):
def datachuli_juxiting(df_zhibiaoshuju, df_zhibiaoliebiao, datecol='date', end_time='', y='y', dataset='dataset', delweekenday=True, add_kdj=False, is_timefurture=False):
'''
聚烯烃特征数据处理函数
接收的是两个df一个是指标数据一个是指标列表
@ -1138,32 +1139,13 @@ def datachuli_juxiting(df_zhibiaoshuju, df_zhibiaoliebiao, datecol='date', end_t
if config.is_add_zt_price:
df = addztprice(df=df)
if config.freq == 'WW':
# 自定义周数据
# 按weekofmothe分组取均值得到新的数据
df = df.groupby(df['yearmonthweeks']).mean()
# 时间列转换为日期格式字符串
df['ds'] = df['ds'].dt.strftime('%Y-%m-%d')
elif config.freq == 'W':
# 按周取样
df = df.resample('W', on='ds').mean().reset_index()
elif config.freq == 'M':
# 按月取样
if 'yearmonthweeks' in df.columns:
df.drop('yearmonthweeks', axis=1, inplace=True)
df = df.resample('ME', on='ds').mean().reset_index()
# 删除预测列空值的行
# df = df.dropna(subset=['y'])
# config.logger.info(f'删除预测列为空值的行后数据量:{df.shape}')
# 删除y列0和''的行
config.logger.info(f'删除y列0和\'\'的行前数据量:{df.shape}')
df = df[df['y'] != 0.0]
config.logger.info(f'删除y列0和\'\'的行后数据量:{df.shape}')
df = df.dropna(axis=1, how='all')
config.logger.info(f'删除全为空值的列后数据量:{df.shape}')
df.to_csv(os.path.join(dataset, '未填充的特征数据.csv'), index=False)
# 去掉指标列表中的columns_to_drop的行
df_zhibiaoliebiao = df_zhibiaoliebiao[df_zhibiaoliebiao['指标名称'].isin(
df.columns.tolist())]
df_zhibiaoliebiao.to_csv(os.path.join(
dataset, '特征处理后的指标名称及分类.csv'), index=False)
# 频度分析
featurePindu(dataset=dataset)
# 向上填充
@ -1175,6 +1157,33 @@ def datachuli_juxiting(df_zhibiaoshuju, df_zhibiaoliebiao, datecol='date', end_t
if delweekenday:
df = df[df['ds'].dt.weekday < 5]
if config.freq == 'WW':
# 自定义周数据
# 按weekofmothe分组取均值得到新的数据
df = df.groupby(df['yearmonthweeks']).mean()
# 时间列转换为日期格式字符串
df['ds'] = df['ds'].dt.strftime('%Y-%m-%d')
elif config.freq == 'W':
# 按周取样
df = df.resample('W', on='ds').mean(numeric_only=True).reset_index()
elif config.freq == 'M':
# 按月取样
if 'yearmonthweeks' in df.columns:
df.drop('yearmonthweeks', axis=1, inplace=True)
df = df.resample('ME', on='ds').mean().reset_index()
# 删除预测列空值的行
# df = df.dropna(subset=['y'])
# config.logger.info(f'删除预测列为空值的行后数据量:{df.shape}')
# 去掉指标列表中的columns_to_drop的行
df_zhibiaoliebiao = df_zhibiaoliebiao[df_zhibiaoliebiao['指标名称'].isin(
df.columns.tolist())]
df_zhibiaoliebiao.to_csv(os.path.join(
dataset, '特征处理后的指标名称及分类.csv'), index=False)
if add_kdj:
df = calculate_kdj(df)
@ -1325,6 +1334,7 @@ def getdata_juxiting(filename, datecol='date', y='y', dataset='', add_kdj=False,
df = datachuli_juxiting(df_zhibiaoshuju, df_zhibiaoliebiao, datecol, y=y, dataset=dataset,
add_kdj=add_kdj, is_timefurture=is_timefurture, end_time=end_time)
return df, df_zhibiaoliebiao

View File

@ -478,14 +478,26 @@ def predict_main():
logger.info('模型训练完成')
logger.info('训练数据绘图ing')
model_results3 = model_losss_juxiting(
sqlitedb, end_time=global_config['end_time'], is_fivemodels=global_config['is_fivemodels'])
logger.info('训练数据绘图end')
try:
logger.info('训练数据绘图ing')
model_results3 = model_losss_juxiting(
sqlitedb, end_time=global_config['end_time'], is_fivemodels=global_config['is_fivemodels'])
logger.info('训练数据绘图end')
except Exception as e:
logger.info(f'训练数据绘图失败:{e}')
push_market_value()
try:
push_market_value()
logger.info('推送市场值完成')
except Exception as e:
logger.info(f'推送市场值失败:{e}')
try:
sql_inset_predict(global_config)
logger.info('插入预测数据完成')
except Exception as e:
logger.info(f'插入预测数据失败:{e}')
sql_inset_predict(global_config)
# 模型报告
# logger.info('制作报告ing')
@ -530,17 +542,17 @@ def predict_main():
if __name__ == '__main__':
# global end_time
# 遍历2024-11-25 到 2024-12-3 之间的工作日日期
for i_time in pd.date_range('2025-7-24', '2025-8-11', freq='B'):
try:
global_config['end_time'] = i_time.strftime('%Y-%m-%d')
global_config['db_mysql'].connect()
predict_main()
except Exception as e:
logger.info(f'预测失败:{e}')
continue
# for i_time in pd.date_range('2025-7-24', '2025-8-12', freq='B'):
# try:
# global_config['end_time'] = i_time.strftime('%Y-%m-%d')
# global_config['db_mysql'].connect()
# predict_main()
# except Exception as e:
# logger.info(f'预测失败:{e}')
# continue
# global_config['end_time'] = '2025-08-05'
# predict_main()
# global_config['end_time'] = '2025-07-25'
predict_main()
# push_market_value()
# sql_inset_predict(global_config)

View File

@ -313,13 +313,13 @@ def predict_main():
df_zhibiaoliebiao.to_excel(file, sheet_name='指标列表', index=False)
# 数据处理
df = zhoududatachuli(df_zhibiaoshuju, df_zhibiaoliebiao, y=global_config['y'], dataset=dataset, add_kdj=add_kdj, is_timefurture=is_timefurture,
df = datachuli_juxiting(df_zhibiaoshuju, df_zhibiaoliebiao, y=global_config['y'], dataset=dataset, add_kdj=add_kdj, is_timefurture=is_timefurture,
end_time=end_time)
else:
# 读取数据
logger.info('读取本地数据:' + os.path.join(dataset, data_set))
df, df_zhibiaoliebiao = getdata_zhoudu_juxiting(filename=os.path.join(dataset, data_set), y=y, dataset=dataset, add_kdj=add_kdj,
df, df_zhibiaoliebiao = getdata_juxiting(filename=os.path.join(dataset, data_set), y=y, dataset=dataset, add_kdj=add_kdj,
is_timefurture=is_timefurture, end_time=end_time) # 原始数据,未处理
# 更改预测列名称
@ -475,8 +475,17 @@ def predict_main():
# logger.info('制作报告end')
push_market_value()
sql_inset_predict(global_config)
try:
push_market_value()
logger.info('推送市场值完成')
except Exception as e:
logger.info(f'推送市场值失败:{e}')
try:
sql_inset_predict(global_config)
logger.info('插入预测数据完成')
except Exception as e:
logger.info(f'插入预测数据失败:{e}')
# # LSTM 单变量模型
# ex_Lstm(df,input_seq_len=input_size,output_seq_len=horizon,is_debug=is_debug,dataset=dataset)
@ -503,18 +512,18 @@ def predict_main():
if __name__ == '__main__':
# global end_time
# 遍历2024-11-25 到 2024-12-3 之间的工作日日期
# for i_time in pd.date_range('2025-7-28', '2025-7-28', freq='B'):
# try:
# global_config['end_time'] = i_time.strftime('%Y-%m-%d')
# global_config['db_mysql'].connect()
# predict_main()
# except Exception as e:
# logger.info(f'预测失败:{e}')
# continue
for i_time in pd.date_range('2025-7-24', '2025-8-12', freq='B'):
try:
global_config['end_time'] = i_time.strftime('%Y-%m-%d')
global_config['db_mysql'].connect()
predict_main()
except Exception as e:
logger.info(f'预测失败:{e}')
continue
global_config['end_time'] = '2025-08-05'
predict_main()
# global_config['end_time'] = '2025-08-05'
# predict_main()
# push_market_value()