原油数据处理逻辑添加周度月度

This commit is contained in:
workpc 2024-11-22 17:31:41 +08:00
parent 9532f96507
commit 8b3c392cff
5 changed files with 10143 additions and 30 deletions

View File

@ -202,8 +202,10 @@ is_update_report = False # 是否上传报告
is_update_warning_data = False # 是否上传预警数据
# 数据截取日期
start_year = 2017 # 数据开始年份
end_time = '' # 数据截取日期
delweekenday = True
freq = 'M' # 时间频率,"D": 天 "W": 周"M": 月"Q": 季度"A": 年 "H": 小时 "T": 分钟 "S": 秒 "B": 工作日
delweekenday = True if freq == 'B' else False # 是否删除周末数据
is_corr = False # 特征是否参与滞后领先提升相关系数
add_kdj = False # 是否添加kdj指标
if add_kdj and is_edbnamelist:

View File

@ -566,20 +566,16 @@ def datachuli(df_zhibiaoshuju,df_zhibiaoliebiao,datecol='date',end_time='',y='y'
# 按时间顺序排列
df.sort_values(by='ds',inplace=True)
df['ds'] = pd.to_datetime(df['ds'])
# 获取2018年到指定日期的数据
df = df[df['ds'].dt.year >= 2018]
# 获取start_year年到end_time的数据
df = df[df['ds'].dt.year >= start_year]
df = df[df['ds'] <= end_time]
last_update_times_df,y_last_update_time = create_feature_last_update_time(df)
logger.info(f'删除预警的特征前数据量:{df.shape}')
columns_to_drop = last_update_times_df[last_update_times_df['warning_date'] < y_last_update_time ]['feature'].values.tolist()
df = df.drop(columns = columns_to_drop)
logger.info(f'删除预警的特征后数据量:{df.shape}')
if is_update_warning_data:
upload_warning_info(last_update_times_df,y_last_update_time)
# 去掉近最后数据对应的日期在六月以前的列删除近2月的数据是常熟的列
current_date = datetime.datetime.now()
two_months_ago = current_date - timedelta(days=180)
@ -591,6 +587,13 @@ def datachuli(df_zhibiaoshuju,df_zhibiaoliebiao,datecol='date',end_time='',y='y'
df = df.drop(columns=columns_to_drop)
logger.info(f'删除两月不更新特征后数据量:{df.shape}')
if freq == 'W':
# 按周取样
df = df.resample('W', on='ds').mean().reset_index()
elif freq == 'M':
# 按月取样
df = df.resample('M', on='ds').mean().reset_index()
# 删除预测列空值的行
df = df.dropna(subset=['y'])
logger.info(f'删除预测列为空值的行后数据量:{df.shape}')
@ -610,6 +613,7 @@ def datachuli(df_zhibiaoshuju,df_zhibiaoliebiao,datecol='date',end_time='',y='y'
# 删除周六日的数据
if delweekenday:
df = df[df['ds'].dt.weekday < 5]
# kdj指标
if add_kdj:
df = calculate_kdj(df)

10107
logs/pricepredict.log.1 Normal file

File diff suppressed because one or more lines are too long

View File

@ -149,32 +149,32 @@ def predict_main():
row, col = df.shape
now = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
# ex_Model(df,
# horizon=horizon,
# input_size=input_size,
# train_steps=train_steps,
# val_check_steps=val_check_steps,
# early_stop_patience_steps=early_stop_patience_steps,
# is_debug=is_debug,
# dataset=dataset,
# is_train=is_train,
# is_fivemodels=is_fivemodels,
# val_size=val_size,
# test_size=test_size,
# settings=settings,
# now=now,
# etadata=etadata,
# modelsindex=modelsindex,
# data=data,
# is_eta=is_eta,
# )
ex_Model(df,
horizon=horizon,
input_size=input_size,
train_steps=train_steps,
val_check_steps=val_check_steps,
early_stop_patience_steps=early_stop_patience_steps,
is_debug=is_debug,
dataset=dataset,
is_train=is_train,
is_fivemodels=is_fivemodels,
val_size=val_size,
test_size=test_size,
settings=settings,
now=now,
etadata=etadata,
modelsindex=modelsindex,
data=data,
is_eta=is_eta,
)
logger.info('模型训练完成')
logger.info('训练数据绘图ing')
# model_results3 = model_losss(sqlitedb)
model_results3 = model_losss_juxiting(sqlitedb)
model_results3 = model_losss(sqlitedb)
# model_results3 = model_losss_juxiting(sqlitedb)
logger.info('训练数据绘图end')
# 模型报告

View File

@ -103,7 +103,7 @@ def ex_Model(df,horizon,input_size,train_steps,val_check_steps,early_stop_patien
df_reg = df_reg[-1000:-1]
# 计算训练集的结束索引占总数据的90%
split_index = int(0.8* len(df_reg))
split_index = int(0.9* len(df_reg))
# 按照时间顺序划分训练集和测试集
df_train = df_reg[:split_index]
@ -165,7 +165,7 @@ def ex_Model(df,horizon,input_size,train_steps,val_check_steps,early_stop_patien
models.append(model)
# 创建NeuralForecast实例并训练模型
nf = NeuralForecast(models=models, freq="B")
nf = NeuralForecast(models=models, freq=freq)
from joblib import dump, load
if is_train: