原油数据处理逻辑添加周度月度

This commit is contained in:
workpc 2024-11-22 17:31:41 +08:00
parent 9532f96507
commit 8b3c392cff
5 changed files with 10143 additions and 30 deletions

View File

@ -202,8 +202,10 @@ is_update_report = False # 是否上传报告
is_update_warning_data = False # 是否上传预警数据 is_update_warning_data = False # 是否上传预警数据
# 数据截取日期 # 数据截取日期
start_year = 2017 # 数据开始年份
end_time = '' # 数据截取日期 end_time = '' # 数据截取日期
delweekenday = True freq = 'M' # 时间频率,"D": 天 "W": 周"M": 月"Q": 季度"A": 年 "H": 小时 "T": 分钟 "S": 秒 "B": 工作日
delweekenday = True if freq == 'B' else False # 是否删除周末数据
is_corr = False # 特征是否参与滞后领先提升相关系数 is_corr = False # 特征是否参与滞后领先提升相关系数
add_kdj = False # 是否添加kdj指标 add_kdj = False # 是否添加kdj指标
if add_kdj and is_edbnamelist: if add_kdj and is_edbnamelist:

View File

@ -566,20 +566,16 @@ def datachuli(df_zhibiaoshuju,df_zhibiaoliebiao,datecol='date',end_time='',y='y'
# 按时间顺序排列 # 按时间顺序排列
df.sort_values(by='ds',inplace=True) df.sort_values(by='ds',inplace=True)
df['ds'] = pd.to_datetime(df['ds']) df['ds'] = pd.to_datetime(df['ds'])
# 获取2018年到指定日期的数据 # 获取start_year年到end_time的数据
df = df[df['ds'].dt.year >= 2018] df = df[df['ds'].dt.year >= start_year]
df = df[df['ds'] <= end_time] df = df[df['ds'] <= end_time]
last_update_times_df,y_last_update_time = create_feature_last_update_time(df) last_update_times_df,y_last_update_time = create_feature_last_update_time(df)
logger.info(f'删除预警的特征前数据量:{df.shape}') logger.info(f'删除预警的特征前数据量:{df.shape}')
columns_to_drop = last_update_times_df[last_update_times_df['warning_date'] < y_last_update_time ]['feature'].values.tolist() columns_to_drop = last_update_times_df[last_update_times_df['warning_date'] < y_last_update_time ]['feature'].values.tolist()
df = df.drop(columns = columns_to_drop) df = df.drop(columns = columns_to_drop)
logger.info(f'删除预警的特征后数据量:{df.shape}') logger.info(f'删除预警的特征后数据量:{df.shape}')
if is_update_warning_data: if is_update_warning_data:
upload_warning_info(last_update_times_df,y_last_update_time) upload_warning_info(last_update_times_df,y_last_update_time)
# 去掉近最后数据对应的日期在六月以前的列删除近2月的数据是常熟的列 # 去掉近最后数据对应的日期在六月以前的列删除近2月的数据是常熟的列
current_date = datetime.datetime.now() current_date = datetime.datetime.now()
two_months_ago = current_date - timedelta(days=180) two_months_ago = current_date - timedelta(days=180)
@ -591,6 +587,13 @@ def datachuli(df_zhibiaoshuju,df_zhibiaoliebiao,datecol='date',end_time='',y='y'
df = df.drop(columns=columns_to_drop) df = df.drop(columns=columns_to_drop)
logger.info(f'删除两月不更新特征后数据量:{df.shape}') logger.info(f'删除两月不更新特征后数据量:{df.shape}')
if freq == 'W':
# 按周取样
df = df.resample('W', on='ds').mean().reset_index()
elif freq == 'M':
# 按月取样
df = df.resample('M', on='ds').mean().reset_index()
# 删除预测列空值的行 # 删除预测列空值的行
df = df.dropna(subset=['y']) df = df.dropna(subset=['y'])
logger.info(f'删除预测列为空值的行后数据量:{df.shape}') logger.info(f'删除预测列为空值的行后数据量:{df.shape}')
@ -610,6 +613,7 @@ def datachuli(df_zhibiaoshuju,df_zhibiaoliebiao,datecol='date',end_time='',y='y'
# 删除周六日的数据 # 删除周六日的数据
if delweekenday: if delweekenday:
df = df[df['ds'].dt.weekday < 5] df = df[df['ds'].dt.weekday < 5]
# kdj指标 # kdj指标
if add_kdj: if add_kdj:
df = calculate_kdj(df) df = calculate_kdj(df)

10107
logs/pricepredict.log.1 Normal file

File diff suppressed because one or more lines are too long

View File

@ -149,32 +149,32 @@ def predict_main():
row, col = df.shape row, col = df.shape
now = datetime.datetime.now().strftime('%Y%m%d%H%M%S') now = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
# ex_Model(df, ex_Model(df,
# horizon=horizon, horizon=horizon,
# input_size=input_size, input_size=input_size,
# train_steps=train_steps, train_steps=train_steps,
# val_check_steps=val_check_steps, val_check_steps=val_check_steps,
# early_stop_patience_steps=early_stop_patience_steps, early_stop_patience_steps=early_stop_patience_steps,
# is_debug=is_debug, is_debug=is_debug,
# dataset=dataset, dataset=dataset,
# is_train=is_train, is_train=is_train,
# is_fivemodels=is_fivemodels, is_fivemodels=is_fivemodels,
# val_size=val_size, val_size=val_size,
# test_size=test_size, test_size=test_size,
# settings=settings, settings=settings,
# now=now, now=now,
# etadata=etadata, etadata=etadata,
# modelsindex=modelsindex, modelsindex=modelsindex,
# data=data, data=data,
# is_eta=is_eta, is_eta=is_eta,
# ) )
logger.info('模型训练完成') logger.info('模型训练完成')
logger.info('训练数据绘图ing') logger.info('训练数据绘图ing')
# model_results3 = model_losss(sqlitedb) model_results3 = model_losss(sqlitedb)
model_results3 = model_losss_juxiting(sqlitedb) # model_results3 = model_losss_juxiting(sqlitedb)
logger.info('训练数据绘图end') logger.info('训练数据绘图end')
# 模型报告 # 模型报告

View File

@ -103,7 +103,7 @@ def ex_Model(df,horizon,input_size,train_steps,val_check_steps,early_stop_patien
df_reg = df_reg[-1000:-1] df_reg = df_reg[-1000:-1]
# 计算训练集的结束索引占总数据的90% # 计算训练集的结束索引占总数据的90%
split_index = int(0.8* len(df_reg)) split_index = int(0.9* len(df_reg))
# 按照时间顺序划分训练集和测试集 # 按照时间顺序划分训练集和测试集
df_train = df_reg[:split_index] df_train = df_reg[:split_index]
@ -165,7 +165,7 @@ def ex_Model(df,horizon,input_size,train_steps,val_check_steps,early_stop_patien
models.append(model) models.append(model)
# 创建NeuralForecast实例并训练模型 # 创建NeuralForecast实例并训练模型
nf = NeuralForecast(models=models, freq="B") nf = NeuralForecast(models=models, freq=freq)
from joblib import dump, load from joblib import dump, load
if is_train: if is_train: