Compare commits
No commits in common. "52113fb5e519bbbf0268cd22ec28eff36082320f" and "280bbab3499137b28dd81060172876ad5c780d44" have entirely different histories.
52113fb5e5
...
280bbab349
@ -159,7 +159,7 @@ warning_data = {
|
|||||||
"funcModule":'原油特征停更预警',
|
"funcModule":'原油特征停更预警',
|
||||||
"funcOperation":'原油特征停更预警',
|
"funcOperation":'原油特征停更预警',
|
||||||
"data":{
|
"data":{
|
||||||
'WARNING_TYPE_NAME':'特征数据停更预警test',
|
'WARNING_TYPE_NAME':'日度数据',
|
||||||
'WARNING_CONTENT':'',
|
'WARNING_CONTENT':'',
|
||||||
'WARNING_DATE':''
|
'WARNING_DATE':''
|
||||||
}
|
}
|
||||||
@ -181,14 +181,14 @@ warning_data = {
|
|||||||
### 开关
|
### 开关
|
||||||
is_train = True # 是否训练
|
is_train = True # 是否训练
|
||||||
is_debug = False # 是否调试
|
is_debug = False # 是否调试
|
||||||
is_eta = True # 是否使用eta接口
|
is_eta = False # 是否使用eta接口
|
||||||
is_timefurture = True # 是否使用时间特征
|
is_timefurture = True # 是否使用时间特征
|
||||||
is_fivemodels = False # 是否使用之前保存的最佳的5个模型
|
is_fivemodels = False # 是否使用之前保存的最佳的5个模型
|
||||||
is_edbcode = False # 特征使用edbcoding列表中的
|
is_edbcode = False # 特征使用edbcoding列表中的
|
||||||
is_edbnamelist = False # 自定义特征,对应上面的edbnamelist
|
is_edbnamelist = False # 自定义特征,对应上面的edbnamelist
|
||||||
is_update_eta = False # 预测结果上传到eta
|
is_update_eta = False # 预测结果上传到eta
|
||||||
is_update_report = False # 是否上传报告
|
is_update_report = False # 是否上传报告
|
||||||
is_update_warning_data = True # 是否上传预警数据
|
is_update_warning_data = False # 是否上传预警数据
|
||||||
|
|
||||||
# 数据截取日期
|
# 数据截取日期
|
||||||
end_time = '' # 数据截取日期
|
end_time = '' # 数据截取日期
|
||||||
|
Binary file not shown.
197
lib/dataread.py
197
lib/dataread.py
@ -12,7 +12,6 @@ import os
|
|||||||
import hmac
|
import hmac
|
||||||
import hashlib
|
import hashlib
|
||||||
import json
|
import json
|
||||||
import math
|
|
||||||
import torch
|
import torch
|
||||||
torch.set_float32_matmul_precision("high")
|
torch.set_float32_matmul_precision("high")
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
@ -115,8 +114,7 @@ def upload_report_data(token, upload_data):
|
|||||||
logger.info("报告上传失败")
|
logger.info("报告上传失败")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def upload_warning_data(warning_data):
|
def upload_warning_data(token, warning_data):
|
||||||
token = get_head_auth_report()
|
|
||||||
warning_data = warning_data
|
warning_data = warning_data
|
||||||
headers = {"Authorization": token}
|
headers = {"Authorization": token}
|
||||||
logger.info("预警上传中...")
|
logger.info("预警上传中...")
|
||||||
@ -130,80 +128,6 @@ def upload_warning_data(warning_data):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def upload_warning_info(last_update_times_df,y_last_update_time):
|
|
||||||
logger.info(f'上传预警信息')
|
|
||||||
try:
|
|
||||||
warning_data_df = last_update_times_df[last_update_times_df['warning_date']<y_last_update_time][['stop_update_period','warning_date','last_update_time','update_period','feature']]
|
|
||||||
warning_data_df.columns = ['停更周期','预警日期','最后更新时间','更新周期','特征名称']
|
|
||||||
if len(warning_data_df) > 0:
|
|
||||||
content = '原油特征指标预警信息:\n\n'
|
|
||||||
warning_data_df = warning_data_df.sort_values(by='停更周期',ascending=False)
|
|
||||||
fixed_length = 30
|
|
||||||
warning_data_df['特征名称'] = warning_data_df['特征名称'].str.replace(" ", "")
|
|
||||||
content = warning_data_df.to_string(index=False, col_space=fixed_length)
|
|
||||||
|
|
||||||
else:
|
|
||||||
logger.info(f'没有需要上传的预警信息')
|
|
||||||
content = '没有需要维护的特征指标'
|
|
||||||
warning_date = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
|
||||||
warning_data['data']['WARNING_DATE'] = warning_date
|
|
||||||
warning_data['data']['WARNING_CONTENT'] = content
|
|
||||||
|
|
||||||
upload_warning_data(warning_data)
|
|
||||||
logger.info(f'上传预警信息成功')
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f'上传预警信息失败:{e}')
|
|
||||||
|
|
||||||
|
|
||||||
def create_feature_last_update_time(df):
|
|
||||||
"""
|
|
||||||
计算特征停更信息用
|
|
||||||
参数:
|
|
||||||
df (DataFrame): 包含特征数据的 DataFrame
|
|
||||||
返回:
|
|
||||||
DataFrame: 包含特征停更信息的 DataFrame
|
|
||||||
str: y 列的最后更新时间
|
|
||||||
"""
|
|
||||||
df1 = df.copy()
|
|
||||||
# 找到每列的最后更新时间
|
|
||||||
df1.set_index('ds', inplace=True)
|
|
||||||
last_update_times = df1.apply(lambda x: x.dropna().index.max().strftime('%Y-%m-%d') if not x.dropna().empty else None)
|
|
||||||
|
|
||||||
# 保存每列的最后更新时间到文件
|
|
||||||
last_update_times_df = pd.DataFrame(columns = ['feature', 'last_update_time','is_value','update_period','warning_date','stop_update_period'])
|
|
||||||
|
|
||||||
# 打印每列的最后更新时间
|
|
||||||
for column, last_update_time in last_update_times.items():
|
|
||||||
values = []
|
|
||||||
# 判断是不是常数值
|
|
||||||
if df1[column].tail(20).nunique() == 1:
|
|
||||||
values = values + [column, last_update_time,1]
|
|
||||||
else:
|
|
||||||
values = values + [column, last_update_time,0]
|
|
||||||
# 计算特征数据值的时间差
|
|
||||||
try:
|
|
||||||
# 计算预警日期
|
|
||||||
time_diff = (df1[column].dropna().index.to_series().diff().mode()[0]).total_seconds() / 3600 / 24
|
|
||||||
from datetime import timedelta
|
|
||||||
last_update_time_datetime = datetime.datetime.strptime(last_update_time, '%Y-%m-%d')
|
|
||||||
last_update_date = end_time if end_time != '' else datetime.datetime.now().strftime('%Y-%m-%d')
|
|
||||||
end_time_datetime = datetime.datetime.strptime(last_update_date, '%Y-%m-%d')
|
|
||||||
early_warning_date = last_update_time_datetime + timedelta(days=time_diff)*2 + timedelta(days=1)
|
|
||||||
stop_update_period = int(math.ceil((end_time_datetime-last_update_time_datetime).days / time_diff))
|
|
||||||
early_warning_date = early_warning_date.strftime('%Y-%m-%d')
|
|
||||||
except KeyError:
|
|
||||||
time_diff = 0
|
|
||||||
early_warning_date = end_time
|
|
||||||
values = values + [time_diff,early_warning_date,stop_update_period]
|
|
||||||
last_update_times_df.loc[len(last_update_times_df)] = values
|
|
||||||
|
|
||||||
logger.info(f"Column {column} was last updated at {last_update_time}")
|
|
||||||
y_last_update_time = last_update_times_df[last_update_times_df['feature']=='y']['warning_date'].values[0]
|
|
||||||
last_update_times_df.to_csv(os.path.join(dataset,'last_update_times.csv'), index=False)
|
|
||||||
logger.info('特征停更信息保存到文件:last_update_times.csv')
|
|
||||||
return last_update_times_df,y_last_update_time
|
|
||||||
|
|
||||||
|
|
||||||
# 统计特征频度
|
# 统计特征频度
|
||||||
def featurePindu(dataset):
|
def featurePindu(dataset):
|
||||||
# 读取文件
|
# 读取文件
|
||||||
@ -521,30 +445,7 @@ def calculate_kdj(data, n=9):
|
|||||||
# data = data.dropna()
|
# data = data.dropna()
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def check_column(df,col_name,two_months_ago):
|
|
||||||
'''
|
|
||||||
检查列是否需要删除。
|
|
||||||
该函数会检查列是否为空值列、180天没有更新的列或常数值列。
|
|
||||||
参数:
|
|
||||||
col_name (str): 列名。
|
|
||||||
df (DataFrame): 包含列的 DataFrame。
|
|
||||||
返回:
|
|
||||||
bool: 如果列需要删除,返回 True;否则,返回 False。
|
|
||||||
'''
|
|
||||||
if 'ds' in col_name or 'y' in col_name:
|
|
||||||
return False
|
|
||||||
df_check_column = df[['ds',col_name]]
|
|
||||||
df_check_column = df_check_column.dropna()
|
|
||||||
|
|
||||||
if len(df_check_column) == 0:
|
|
||||||
print(f'空值列:{col_name}')
|
|
||||||
return True
|
|
||||||
# 判断是不是常数列
|
|
||||||
if df_check_column[(df_check_column['ds']>= two_months_ago)].groupby(col_name).ngroups < 2:
|
|
||||||
print(f'180没有更新:{col_name}')
|
|
||||||
return True
|
|
||||||
corresponding_date = df_check_column.iloc[-1]['ds']
|
|
||||||
return corresponding_date < two_months_ago
|
|
||||||
|
|
||||||
def datachuli(df_zhibiaoshuju,df_zhibiaoliebiao,datecol='date',end_time='',y='y',dataset='dataset',delweekenday=False,add_kdj=False,is_timefurture=False):
|
def datachuli(df_zhibiaoshuju,df_zhibiaoliebiao,datecol='date',end_time='',y='y',dataset='dataset',delweekenday=False,add_kdj=False,is_timefurture=False):
|
||||||
'''
|
'''
|
||||||
@ -566,27 +467,107 @@ def datachuli(df_zhibiaoshuju,df_zhibiaoliebiao,datecol='date',end_time='',y='y'
|
|||||||
df = df[df['ds'].dt.year >= 2018]
|
df = df[df['ds'].dt.year >= 2018]
|
||||||
df = df[df['ds'] <= end_time]
|
df = df[df['ds'] <= end_time]
|
||||||
|
|
||||||
last_update_times_df,y_last_update_time = create_feature_last_update_time(df)
|
df1 = df.copy() # 计算特征停更信息用
|
||||||
logger.info(f'删除预警的特征前数据量:{df.shape}')
|
# 找到每列的最后更新时间
|
||||||
|
df1.set_index('ds', inplace=True)
|
||||||
|
last_update_times = df1.apply(lambda x: x.dropna().index.max().strftime('%Y-%m-%d') if not x.dropna().empty else None)
|
||||||
|
|
||||||
|
# 保存每列的最后更新时间到文件
|
||||||
|
last_update_times_df = pd.DataFrame(columns = ['feature', 'last_update_time','is_value','update_period','warning_date'])
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# 打印每列的最后更新时间
|
||||||
|
for column, last_update_time in last_update_times.items():
|
||||||
|
values = []
|
||||||
|
# 判断是不是常数值
|
||||||
|
if df1[column].tail(20).nunique() == 1:
|
||||||
|
values = values + [column, last_update_time,1]
|
||||||
|
else:
|
||||||
|
values = values + [column, last_update_time,0]
|
||||||
|
# 计算特征数据值的时间差
|
||||||
|
try:
|
||||||
|
# 计算预警日期
|
||||||
|
time_diff = (df1[column].dropna().index.to_series().diff().mode()[0]).total_seconds() / 3600 / 24
|
||||||
|
from datetime import timedelta
|
||||||
|
early_warning_date = datetime.datetime.strptime(last_update_time, '%Y-%m-%d') + timedelta(days=time_diff)*2 + timedelta(days=1)
|
||||||
|
early_warning_date = early_warning_date.strftime('%Y-%m-%d')
|
||||||
|
except KeyError:
|
||||||
|
time_diff = 0
|
||||||
|
early_warning_date = end_time
|
||||||
|
values = values + [time_diff,early_warning_date]
|
||||||
|
last_update_times_df.loc[len(last_update_times_df)] = values
|
||||||
|
|
||||||
|
logger.info(f"Column {column} was last updated at {last_update_time}")
|
||||||
|
|
||||||
|
last_update_times_df.to_csv(os.path.join(dataset,'last_update_times.csv'), index=False)
|
||||||
|
logger.info('特征停更信息保存到文件:last_update_times.csv')
|
||||||
|
|
||||||
|
logger.info(f'删除预警的特征前数据量:{df.shape}')
|
||||||
|
y_last_update_time = last_update_times_df[last_update_times_df['feature']=='y']['warning_date'].values[0]
|
||||||
columns_to_drop = last_update_times_df[last_update_times_df['warning_date'] < y_last_update_time ]['feature'].values.tolist()
|
columns_to_drop = last_update_times_df[last_update_times_df['warning_date'] < y_last_update_time ]['feature'].values.tolist()
|
||||||
df = df.drop(columns = columns_to_drop)
|
df = df.drop(columns = columns_to_drop)
|
||||||
|
|
||||||
logger.info(f'删除预警的特征后数据量:{df.shape}')
|
logger.info(f'删除预警的特征后数据量:{df.shape}')
|
||||||
|
|
||||||
|
|
||||||
if is_update_warning_data:
|
if is_update_warning_data:
|
||||||
upload_warning_info(last_update_times_df,y_last_update_time)
|
logger.info(f'上传预警信息')
|
||||||
|
try:
|
||||||
|
warning_data_df = last_update_times_df[last_update_times_df['warning_date']<y_last_update_time][['last_update_time','feature']]
|
||||||
|
if len(warning_data_df) > 0:
|
||||||
|
content = '原油特征指标预警信息:\n\n'
|
||||||
|
warning_data_df = warning_data_df.sort_values(by='last_update_time',ascending=False)
|
||||||
|
for ds, df in warning_data_df.groupby('last_update_time'):
|
||||||
|
content += f'{ds} \n {df["feature"].to_string(index=False).replace(" ", "")}\n\n'
|
||||||
|
else:
|
||||||
|
logger.info(f'没有需要上传的预警信息')
|
||||||
|
content = '没有需要维护的特征指标'
|
||||||
|
warning_data['data']['WARNING_DATE'] = y_last_update_time
|
||||||
|
warning_data['data']['WARNING_CONTENT'] = content
|
||||||
|
token = get_head_auth_report()
|
||||||
|
upload_warning_data(token, warning_data)
|
||||||
|
logger.info(f'上传预警信息成功')
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f'上传预警信息失败:{e}')
|
||||||
|
|
||||||
# 去掉近最后数据对应的日期在六月以前的列,删除近2月的数据是常熟的列
|
# 去掉近最后数据对应的日期在六月以前的列,删除近2月的数据是常熟的列
|
||||||
current_date = datetime.datetime.now()
|
current_date = datetime.datetime.now()
|
||||||
two_months_ago = current_date - timedelta(days=180)
|
two_months_ago = current_date - timedelta(days=180)
|
||||||
|
|
||||||
logger.info(f'删除两月不更新特征前数据量:{df.shape}')
|
logger.info(f'删除两月不更新特征前数据量:{df.shape}')
|
||||||
columns_to_drop = []
|
def check_column(col_name):
|
||||||
for clo in df.columns:
|
'''
|
||||||
if check_column(df,clo,two_months_ago):
|
去掉空值列
|
||||||
columns_to_drop.append(clo)
|
去掉180天没有更新的列
|
||||||
df = df.drop(columns=columns_to_drop)
|
去掉常数值列
|
||||||
|
|
||||||
|
输入:列名
|
||||||
|
输出:True or False
|
||||||
|
'''
|
||||||
|
if 'ds' in col_name or 'y' in col_name:
|
||||||
|
return False
|
||||||
|
df_check_column = df[['ds',col_name]]
|
||||||
|
df_check_column = df_check_column.dropna()
|
||||||
|
|
||||||
|
if len(df_check_column) == 0:
|
||||||
|
print(f'空值列:{col_name}')
|
||||||
|
return True
|
||||||
|
# 判断是不是常数列
|
||||||
|
if df_check_column[(df_check_column['ds']>= two_months_ago)].groupby(col_name).ngroups < 2:
|
||||||
|
print(f'180没有更新:{col_name}')
|
||||||
|
return True
|
||||||
|
corresponding_date = df_check_column.iloc[-1]['ds']
|
||||||
|
return corresponding_date < two_months_ago
|
||||||
|
columns_to_drop = df.columns[df.columns.map(check_column)].tolist()
|
||||||
|
df = df.drop(columns = columns_to_drop)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
logger.info(f'删除两月不更新特征后数据量:{df.shape}')
|
logger.info(f'删除两月不更新特征后数据量:{df.shape}')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# 删除预测列空值的行
|
# 删除预测列空值的行
|
||||||
df = df.dropna(subset=['y'])
|
df = df.dropna(subset=['y'])
|
||||||
logger.info(f'删除预测列为空值的行后数据量:{df.shape}')
|
logger.info(f'删除预测列为空值的行后数据量:{df.shape}')
|
||||||
|
19827
logs/pricepredict.log.1
19827
logs/pricepredict.log.1
File diff suppressed because one or more lines are too long
11584
logs/pricepredict.log.2
Normal file
11584
logs/pricepredict.log.2
Normal file
File diff suppressed because it is too large
Load Diff
12046
logs/pricepredict.log.3
Normal file
12046
logs/pricepredict.log.3
Normal file
File diff suppressed because it is too large
Load Diff
1
logs/pricepredict.log.4
Normal file
1
logs/pricepredict.log.4
Normal file
File diff suppressed because one or more lines are too long
10448
logs/pricepredict.log.5
10448
logs/pricepredict.log.5
File diff suppressed because it is too large
Load Diff
@ -11,49 +11,10 @@ torch.set_float32_matmul_precision("high")
|
|||||||
sqlitedb = SQLiteHandler(db_name)
|
sqlitedb = SQLiteHandler(db_name)
|
||||||
sqlitedb.connect()
|
sqlitedb.connect()
|
||||||
|
|
||||||
|
|
||||||
def predict_main():
|
def predict_main():
|
||||||
"""
|
|
||||||
主预测函数,用于从 ETA 获取数据、处理数据、训练模型并进行预测。
|
|
||||||
|
|
||||||
参数:
|
|
||||||
signature (BinanceAPI): Binance API 实例。
|
|
||||||
etadata (EtaReader): ETA 数据读取器实例。
|
|
||||||
is_eta (bool): 是否从 ETA 获取数据。
|
|
||||||
data_set (str): 数据集名称。
|
|
||||||
dataset (str): 数据集路径。
|
|
||||||
add_kdj (bool): 是否添加 KDJ 指标。
|
|
||||||
is_timefurture (bool): 是否添加时间衍生特征。
|
|
||||||
end_time (str): 结束时间。
|
|
||||||
is_edbnamelist (bool): 是否使用 EDB 名称列表。
|
|
||||||
edbnamelist (list): EDB 名称列表。
|
|
||||||
y (str): 预测目标列名。
|
|
||||||
sqlitedb (SQLiteDB): SQLite 数据库实例。
|
|
||||||
is_corr (bool): 是否进行相关性分析。
|
|
||||||
horizon (int): 预测时域。
|
|
||||||
input_size (int): 输入数据大小。
|
|
||||||
train_steps (int): 训练步数。
|
|
||||||
val_check_steps (int): 验证检查步数。
|
|
||||||
early_stop_patience_steps (int): 早停耐心步数。
|
|
||||||
is_debug (bool): 是否调试模式。
|
|
||||||
dataset (str): 数据集名称。
|
|
||||||
is_train (bool): 是否训练模型。
|
|
||||||
is_fivemodels (bool): 是否使用五个模型。
|
|
||||||
val_size (float): 验证集大小。
|
|
||||||
test_size (float): 测试集大小。
|
|
||||||
settings (dict): 模型设置。
|
|
||||||
now (str): 当前时间。
|
|
||||||
etadata (EtaReader): ETA 数据读取器实例。
|
|
||||||
modelsindex (list): 模型索引列表。
|
|
||||||
data (str): 数据类型。
|
|
||||||
is_eta (bool): 是否从 ETA 获取数据。
|
|
||||||
|
|
||||||
返回:
|
|
||||||
None
|
|
||||||
"""
|
|
||||||
signature = BinanceAPI(APPID, SECRET)
|
signature = BinanceAPI(APPID, SECRET)
|
||||||
etadata = EtaReader(signature=signature,
|
etadata = EtaReader(signature=signature,
|
||||||
classifylisturl=classifylisturl,
|
classifylisturl = classifylisturl,
|
||||||
classifyidlisturl=classifyidlisturl,
|
classifyidlisturl=classifyidlisturl,
|
||||||
edbcodedataurl=edbcodedataurl,
|
edbcodedataurl=edbcodedataurl,
|
||||||
edbcodelist=edbcodelist,
|
edbcodelist=edbcodelist,
|
||||||
@ -66,7 +27,7 @@ def predict_main():
|
|||||||
logger.info('从eta获取数据...')
|
logger.info('从eta获取数据...')
|
||||||
signature = BinanceAPI(APPID, SECRET)
|
signature = BinanceAPI(APPID, SECRET)
|
||||||
etadata = EtaReader(signature=signature,
|
etadata = EtaReader(signature=signature,
|
||||||
classifylisturl=classifylisturl,
|
classifylisturl = classifylisturl,
|
||||||
classifyidlisturl=classifyidlisturl,
|
classifyidlisturl=classifyidlisturl,
|
||||||
edbcodedataurl=edbcodedataurl,
|
edbcodedataurl=edbcodedataurl,
|
||||||
edbcodelist=edbcodelist,
|
edbcodelist=edbcodelist,
|
||||||
@ -74,40 +35,39 @@ def predict_main():
|
|||||||
edbdeleteurl=edbdeleteurl,
|
edbdeleteurl=edbdeleteurl,
|
||||||
edbbusinessurl=edbbusinessurl,
|
edbbusinessurl=edbbusinessurl,
|
||||||
)
|
)
|
||||||
df_zhibiaoshuju, df_zhibiaoliebiao = etadata.get_eta_api_yuanyou_data(data_set=data_set, dataset=dataset) # 原始数据,未处理
|
df_zhibiaoshuju,df_zhibiaoliebiao = etadata.get_eta_api_yuanyou_data(data_set=data_set,dataset=dataset) # 原始数据,未处理
|
||||||
|
|
||||||
|
|
||||||
# 数据处理
|
# 数据处理
|
||||||
df = datachuli(df_zhibiaoshuju, df_zhibiaoliebiao, y=y, dataset=dataset, add_kdj=add_kdj, is_timefurture=is_timefurture,
|
df = datachuli(df_zhibiaoshuju,df_zhibiaoliebiao,y = y,dataset=dataset,add_kdj=add_kdj,is_timefurture=is_timefurture,end_time=end_time)
|
||||||
end_time=end_time)
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# 读取数据
|
# 读取数据
|
||||||
logger.info('读取本地数据:' + os.path.join(dataset, data_set))
|
logger.info('读取本地数据:'+os.path.join(dataset,data_set))
|
||||||
df = getdata(filename=os.path.join(dataset, data_set), y=y, dataset=dataset, add_kdj=add_kdj,
|
df = getdata(filename=os.path.join(dataset,data_set),y=y,dataset=dataset,add_kdj=add_kdj,is_timefurture=is_timefurture,end_time=end_time) # 原始数据,未处理
|
||||||
is_timefurture=is_timefurture, end_time=end_time) # 原始数据,未处理
|
|
||||||
|
|
||||||
# 更改预测列名称
|
# 更改预测列名称
|
||||||
df.rename(columns={y: 'y'}, inplace=True)
|
df.rename(columns={y:'y'},inplace=True)
|
||||||
|
|
||||||
if is_edbnamelist:
|
if is_edbnamelist:
|
||||||
df = df[edbnamelist]
|
df = df[edbnamelist]
|
||||||
df.to_csv(os.path.join(dataset, '指标数据.csv'), index=False)
|
df.to_csv(os.path.join(dataset,'指标数据.csv'), index=False)
|
||||||
# 保存最新日期的y值到数据库
|
# 保存最新日期的y值到数据库
|
||||||
# 取第一行数据存储到数据库中
|
# 取第一行数据存储到数据库中
|
||||||
first_row = df[['ds', 'y']].tail(1)
|
first_row = df[['ds','y']].tail(1)
|
||||||
# 将最新真实值保存到数据库
|
# 将最新真实值保存到数据库
|
||||||
if not sqlitedb.check_table_exists('trueandpredict'):
|
if not sqlitedb.check_table_exists('trueandpredict'):
|
||||||
first_row.to_sql('trueandpredict', sqlitedb.connection, index=False)
|
first_row.to_sql('trueandpredict',sqlitedb.connection,index=False)
|
||||||
else:
|
else:
|
||||||
for row in first_row.itertuples(index=False):
|
for row in first_row.itertuples(index=False):
|
||||||
row_dict = row._asdict()
|
row_dict = row._asdict()
|
||||||
row_dict['ds'] = row_dict['ds'].strftime('%Y-%m-%d %H:%M:%S')
|
row_dict['ds'] = row_dict['ds'].strftime('%Y-%m-%d %H:%M:%S')
|
||||||
check_query = sqlitedb.select_data('trueandpredict', where_condition=f"ds = '{row.ds}'")
|
check_query = sqlitedb.select_data('trueandpredict',where_condition = f"ds = '{row.ds}'")
|
||||||
if len(check_query) > 0:
|
if len(check_query) > 0:
|
||||||
set_clause = ", ".join([f"{key} = '{value}'" for key, value in row_dict.items()])
|
set_clause = ", ".join([f"{key} = '{value}'" for key, value in row_dict.items()])
|
||||||
sqlitedb.update_data('trueandpredict', set_clause, where_condition=f"ds = '{row.ds}'")
|
sqlitedb.update_data('trueandpredict',set_clause,where_condition = f"ds = '{row.ds}'")
|
||||||
continue
|
continue
|
||||||
sqlitedb.insert_data('trueandpredict', tuple(row_dict.values()), columns=row_dict.keys())
|
sqlitedb.insert_data('trueandpredict',tuple(row_dict.values()),columns=row_dict.keys())
|
||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
# 判断当前日期是不是周一
|
# 判断当前日期是不是周一
|
||||||
@ -116,7 +76,7 @@ def predict_main():
|
|||||||
logger.info('今天是周一,更新预测模型')
|
logger.info('今天是周一,更新预测模型')
|
||||||
# 计算最近20天预测残差最低的模型名称
|
# 计算最近20天预测残差最低的模型名称
|
||||||
|
|
||||||
model_results = sqlitedb.select_data('trueandpredict', order_by="ds DESC", limit="20")
|
model_results = sqlitedb.select_data('trueandpredict',order_by = "ds DESC",limit = "20")
|
||||||
model_results = model_results.dropna()
|
model_results = model_results.dropna()
|
||||||
modelnames = model_results.columns.to_list()[2:]
|
modelnames = model_results.columns.to_list()[2:]
|
||||||
for col in model_results[modelnames].select_dtypes(include=['object']).columns:
|
for col in model_results[modelnames].select_dtypes(include=['object']).columns:
|
||||||
@ -138,15 +98,15 @@ def predict_main():
|
|||||||
# 保存结果到数据库
|
# 保存结果到数据库
|
||||||
|
|
||||||
if not sqlitedb.check_table_exists('most_model'):
|
if not sqlitedb.check_table_exists('most_model'):
|
||||||
sqlitedb.create_table('most_model', columns="ds datetime, most_common_model TEXT")
|
sqlitedb.create_table('most_model',columns="ds datetime, most_common_model TEXT")
|
||||||
sqlitedb.insert_data('most_model', (datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), most_common_model,), columns=('ds', 'most_common_model',))
|
sqlitedb.insert_data('most_model',(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),most_common_model,),columns=('ds','most_common_model',))
|
||||||
|
|
||||||
if is_corr:
|
if is_corr:
|
||||||
df = corr_feature(df=df)
|
df = corr_feature(df=df)
|
||||||
|
|
||||||
df1 = df.copy() # 备份一下,后面特征筛选完之后加入ds y 列用
|
df1 = df.copy() # 备份一下,后面特征筛选完之后加入ds y 列用
|
||||||
logger.info(f"开始训练模型...")
|
logger.info(f"开始训练模型...")
|
||||||
row, col = df.shape
|
row,col = df.shape
|
||||||
|
|
||||||
now = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
|
now = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
|
||||||
ex_Model(df,
|
ex_Model(df,
|
||||||
@ -163,20 +123,22 @@ def predict_main():
|
|||||||
test_size=test_size,
|
test_size=test_size,
|
||||||
settings=settings,
|
settings=settings,
|
||||||
now=now,
|
now=now,
|
||||||
etadata=etadata,
|
etadata = etadata,
|
||||||
modelsindex=modelsindex,
|
modelsindex = modelsindex,
|
||||||
data=data,
|
data = data,
|
||||||
is_eta=is_eta,
|
is_eta=is_eta,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
logger.info('模型训练完成')
|
logger.info('模型训练完成')
|
||||||
|
# # 模型评估
|
||||||
|
|
||||||
logger.info('训练数据绘图ing')
|
logger.info('训练数据绘图ing')
|
||||||
model_results3 = model_losss_juxiting(sqlitedb)
|
model_results3 = model_losss_juxiting(sqlitedb)
|
||||||
logger.info('训练数据绘图end')
|
|
||||||
|
|
||||||
|
logger.info('训练数据绘图end')
|
||||||
# 模型报告
|
# 模型报告
|
||||||
|
|
||||||
logger.info('制作报告ing')
|
logger.info('制作报告ing')
|
||||||
title = f'{settings}--{now}-预测报告' # 报告标题
|
title = f'{settings}--{now}-预测报告' # 报告标题
|
||||||
|
|
||||||
@ -186,6 +148,8 @@ def predict_main():
|
|||||||
logger.info('制作报告end')
|
logger.info('制作报告end')
|
||||||
logger.info('模型训练完成')
|
logger.info('模型训练完成')
|
||||||
|
|
||||||
|
# tansuanli_export_pdf(dataset=dataset,num_models = 5 if is_fivemodels else 22,end_time=end_time,reportname=reportname)
|
||||||
|
|
||||||
# # LSTM 单变量模型
|
# # LSTM 单变量模型
|
||||||
# ex_Lstm(df,input_seq_len=input_size,output_seq_len=horizon,is_debug=is_debug,dataset=dataset)
|
# ex_Lstm(df,input_seq_len=input_size,output_seq_len=horizon,is_debug=is_debug,dataset=dataset)
|
||||||
|
|
||||||
|
BIN
原油指标数据.xlsx
Normal file
BIN
原油指标数据.xlsx
Normal file
Binary file not shown.
296
测试环境登录接口调试.ipynb
296
测试环境登录接口调试.ipynb
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue
Block a user