原油数据处理逻辑添加周度月度
This commit is contained in:
parent
9532f96507
commit
8b3c392cff
@ -202,8 +202,10 @@ is_update_report = False # 是否上传报告
|
|||||||
is_update_warning_data = False # 是否上传预警数据
|
is_update_warning_data = False # 是否上传预警数据
|
||||||
|
|
||||||
# 数据截取日期
|
# 数据截取日期
|
||||||
|
start_year = 2017 # 数据开始年份
|
||||||
end_time = '' # 数据截取日期
|
end_time = '' # 数据截取日期
|
||||||
delweekenday = True
|
freq = 'M' # 时间频率,"D": 天 "W": 周"M": 月"Q": 季度"A": 年 "H": 小时 "T": 分钟 "S": 秒 "B": 工作日
|
||||||
|
delweekenday = True if freq == 'B' else False # 是否删除周末数据
|
||||||
is_corr = False # 特征是否参与滞后领先提升相关系数
|
is_corr = False # 特征是否参与滞后领先提升相关系数
|
||||||
add_kdj = False # 是否添加kdj指标
|
add_kdj = False # 是否添加kdj指标
|
||||||
if add_kdj and is_edbnamelist:
|
if add_kdj and is_edbnamelist:
|
||||||
|
@ -566,20 +566,16 @@ def datachuli(df_zhibiaoshuju,df_zhibiaoliebiao,datecol='date',end_time='',y='y'
|
|||||||
# 按时间顺序排列
|
# 按时间顺序排列
|
||||||
df.sort_values(by='ds',inplace=True)
|
df.sort_values(by='ds',inplace=True)
|
||||||
df['ds'] = pd.to_datetime(df['ds'])
|
df['ds'] = pd.to_datetime(df['ds'])
|
||||||
# 获取2018年到指定日期的数据
|
# 获取start_year年到end_time的数据
|
||||||
df = df[df['ds'].dt.year >= 2018]
|
df = df[df['ds'].dt.year >= start_year]
|
||||||
df = df[df['ds'] <= end_time]
|
df = df[df['ds'] <= end_time]
|
||||||
|
|
||||||
last_update_times_df,y_last_update_time = create_feature_last_update_time(df)
|
last_update_times_df,y_last_update_time = create_feature_last_update_time(df)
|
||||||
logger.info(f'删除预警的特征前数据量:{df.shape}')
|
logger.info(f'删除预警的特征前数据量:{df.shape}')
|
||||||
|
|
||||||
columns_to_drop = last_update_times_df[last_update_times_df['warning_date'] < y_last_update_time ]['feature'].values.tolist()
|
columns_to_drop = last_update_times_df[last_update_times_df['warning_date'] < y_last_update_time ]['feature'].values.tolist()
|
||||||
df = df.drop(columns = columns_to_drop)
|
df = df.drop(columns = columns_to_drop)
|
||||||
|
|
||||||
logger.info(f'删除预警的特征后数据量:{df.shape}')
|
logger.info(f'删除预警的特征后数据量:{df.shape}')
|
||||||
if is_update_warning_data:
|
if is_update_warning_data:
|
||||||
upload_warning_info(last_update_times_df,y_last_update_time)
|
upload_warning_info(last_update_times_df,y_last_update_time)
|
||||||
|
|
||||||
# 去掉近最后数据对应的日期在六月以前的列,删除近2月的数据是常熟的列
|
# 去掉近最后数据对应的日期在六月以前的列,删除近2月的数据是常熟的列
|
||||||
current_date = datetime.datetime.now()
|
current_date = datetime.datetime.now()
|
||||||
two_months_ago = current_date - timedelta(days=180)
|
two_months_ago = current_date - timedelta(days=180)
|
||||||
@ -591,6 +587,13 @@ def datachuli(df_zhibiaoshuju,df_zhibiaoliebiao,datecol='date',end_time='',y='y'
|
|||||||
df = df.drop(columns=columns_to_drop)
|
df = df.drop(columns=columns_to_drop)
|
||||||
|
|
||||||
logger.info(f'删除两月不更新特征后数据量:{df.shape}')
|
logger.info(f'删除两月不更新特征后数据量:{df.shape}')
|
||||||
|
|
||||||
|
if freq == 'W':
|
||||||
|
# 按周取样
|
||||||
|
df = df.resample('W', on='ds').mean().reset_index()
|
||||||
|
elif freq == 'M':
|
||||||
|
# 按月取样
|
||||||
|
df = df.resample('M', on='ds').mean().reset_index()
|
||||||
# 删除预测列空值的行
|
# 删除预测列空值的行
|
||||||
df = df.dropna(subset=['y'])
|
df = df.dropna(subset=['y'])
|
||||||
logger.info(f'删除预测列为空值的行后数据量:{df.shape}')
|
logger.info(f'删除预测列为空值的行后数据量:{df.shape}')
|
||||||
@ -610,6 +613,7 @@ def datachuli(df_zhibiaoshuju,df_zhibiaoliebiao,datecol='date',end_time='',y='y'
|
|||||||
# 删除周六日的数据
|
# 删除周六日的数据
|
||||||
if delweekenday:
|
if delweekenday:
|
||||||
df = df[df['ds'].dt.weekday < 5]
|
df = df[df['ds'].dt.weekday < 5]
|
||||||
|
|
||||||
# kdj指标
|
# kdj指标
|
||||||
if add_kdj:
|
if add_kdj:
|
||||||
df = calculate_kdj(df)
|
df = calculate_kdj(df)
|
||||||
|
10107
logs/pricepredict.log.1
Normal file
10107
logs/pricepredict.log.1
Normal file
File diff suppressed because one or more lines are too long
@ -149,32 +149,32 @@ def predict_main():
|
|||||||
row, col = df.shape
|
row, col = df.shape
|
||||||
|
|
||||||
now = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
|
now = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
|
||||||
# ex_Model(df,
|
ex_Model(df,
|
||||||
# horizon=horizon,
|
horizon=horizon,
|
||||||
# input_size=input_size,
|
input_size=input_size,
|
||||||
# train_steps=train_steps,
|
train_steps=train_steps,
|
||||||
# val_check_steps=val_check_steps,
|
val_check_steps=val_check_steps,
|
||||||
# early_stop_patience_steps=early_stop_patience_steps,
|
early_stop_patience_steps=early_stop_patience_steps,
|
||||||
# is_debug=is_debug,
|
is_debug=is_debug,
|
||||||
# dataset=dataset,
|
dataset=dataset,
|
||||||
# is_train=is_train,
|
is_train=is_train,
|
||||||
# is_fivemodels=is_fivemodels,
|
is_fivemodels=is_fivemodels,
|
||||||
# val_size=val_size,
|
val_size=val_size,
|
||||||
# test_size=test_size,
|
test_size=test_size,
|
||||||
# settings=settings,
|
settings=settings,
|
||||||
# now=now,
|
now=now,
|
||||||
# etadata=etadata,
|
etadata=etadata,
|
||||||
# modelsindex=modelsindex,
|
modelsindex=modelsindex,
|
||||||
# data=data,
|
data=data,
|
||||||
# is_eta=is_eta,
|
is_eta=is_eta,
|
||||||
# )
|
)
|
||||||
|
|
||||||
|
|
||||||
logger.info('模型训练完成')
|
logger.info('模型训练完成')
|
||||||
|
|
||||||
logger.info('训练数据绘图ing')
|
logger.info('训练数据绘图ing')
|
||||||
# model_results3 = model_losss(sqlitedb)
|
model_results3 = model_losss(sqlitedb)
|
||||||
model_results3 = model_losss_juxiting(sqlitedb)
|
# model_results3 = model_losss_juxiting(sqlitedb)
|
||||||
logger.info('训练数据绘图end')
|
logger.info('训练数据绘图end')
|
||||||
|
|
||||||
# 模型报告
|
# 模型报告
|
||||||
|
@ -103,7 +103,7 @@ def ex_Model(df,horizon,input_size,train_steps,val_check_steps,early_stop_patien
|
|||||||
df_reg = df_reg[-1000:-1]
|
df_reg = df_reg[-1000:-1]
|
||||||
|
|
||||||
# 计算训练集的结束索引,占总数据的90%
|
# 计算训练集的结束索引,占总数据的90%
|
||||||
split_index = int(0.8* len(df_reg))
|
split_index = int(0.9* len(df_reg))
|
||||||
|
|
||||||
# 按照时间顺序划分训练集和测试集
|
# 按照时间顺序划分训练集和测试集
|
||||||
df_train = df_reg[:split_index]
|
df_train = df_reg[:split_index]
|
||||||
@ -165,7 +165,7 @@ def ex_Model(df,horizon,input_size,train_steps,val_check_steps,early_stop_patien
|
|||||||
models.append(model)
|
models.append(model)
|
||||||
|
|
||||||
# 创建NeuralForecast实例并训练模型
|
# 创建NeuralForecast实例并训练模型
|
||||||
nf = NeuralForecast(models=models, freq="B")
|
nf = NeuralForecast(models=models, freq=freq)
|
||||||
|
|
||||||
from joblib import dump, load
|
from joblib import dump, load
|
||||||
if is_train:
|
if is_train:
|
||||||
|
Loading…
Reference in New Issue
Block a user