添加注释
This commit is contained in:
parent
d61259d3b9
commit
11270ead85
@ -436,6 +436,11 @@ def calculate_kdj(data, n=9):
|
|||||||
|
|
||||||
|
|
||||||
def datachuli(df_zhibiaoshuju,df_zhibiaoliebiao,datecol='date',end_time='',y='y',dataset='dataset',delweekenday=False,add_kdj=False,is_timefurture=False):
|
def datachuli(df_zhibiaoshuju,df_zhibiaoliebiao,datecol='date',end_time='',y='y',dataset='dataset',delweekenday=False,add_kdj=False,is_timefurture=False):
|
||||||
|
'''
|
||||||
|
原油特征数据处理函数,
|
||||||
|
接收的是两个df,一个是指标数据,一个是指标列表
|
||||||
|
输出的是一个df,包含ds,y,指标列
|
||||||
|
'''
|
||||||
df = df_zhibiaoshuju.copy()
|
df = df_zhibiaoshuju.copy()
|
||||||
if end_time == '':
|
if end_time == '':
|
||||||
end_time = datetime.datetime.now().strftime('%Y-%m-%d')
|
end_time = datetime.datetime.now().strftime('%Y-%m-%d')
|
||||||
@ -457,6 +462,11 @@ def datachuli(df_zhibiaoshuju,df_zhibiaoliebiao,datecol='date',end_time='',y='y'
|
|||||||
two_months_ago = current_date - timedelta(days=40)
|
two_months_ago = current_date - timedelta(days=40)
|
||||||
|
|
||||||
def check_column(col_name):
|
def check_column(col_name):
|
||||||
|
'''
|
||||||
|
判断两月不更新指标
|
||||||
|
输入:列名
|
||||||
|
输出:True or False
|
||||||
|
'''
|
||||||
if 'ds' in col_name or 'y' in col_name:
|
if 'ds' in col_name or 'y' in col_name:
|
||||||
return False
|
return False
|
||||||
df_check_column = df[['ds',col_name]]
|
df_check_column = df[['ds',col_name]]
|
||||||
@ -469,7 +479,6 @@ def datachuli(df_zhibiaoshuju,df_zhibiaoliebiao,datecol='date',end_time='',y='y'
|
|||||||
return corresponding_date < two_months_ago
|
return corresponding_date < two_months_ago
|
||||||
columns_to_drop = df.columns[df.columns.map(check_column)].tolist()
|
columns_to_drop = df.columns[df.columns.map(check_column)].tolist()
|
||||||
df = df.drop(columns = columns_to_drop)
|
df = df.drop(columns = columns_to_drop)
|
||||||
|
|
||||||
logger.info(f'删除两月不更新特征后数据量:{df.shape}')
|
logger.info(f'删除两月不更新特征后数据量:{df.shape}')
|
||||||
|
|
||||||
# 删除预测列空值的行
|
# 删除预测列空值的行
|
||||||
@ -481,7 +490,7 @@ def datachuli(df_zhibiaoshuju,df_zhibiaoliebiao,datecol='date',end_time='',y='y'
|
|||||||
# 去掉指标列表中的columns_to_drop的行
|
# 去掉指标列表中的columns_to_drop的行
|
||||||
df_zhibiaoliebiao = df_zhibiaoliebiao[df_zhibiaoliebiao['指标名称'].isin(df.columns.tolist())]
|
df_zhibiaoliebiao = df_zhibiaoliebiao[df_zhibiaoliebiao['指标名称'].isin(df.columns.tolist())]
|
||||||
df_zhibiaoliebiao.to_csv(os.path.join(dataset,'特征处理后的指标名称及分类.csv'),index=False)
|
df_zhibiaoliebiao.to_csv(os.path.join(dataset,'特征处理后的指标名称及分类.csv'),index=False)
|
||||||
# 频度分析
|
# 数据频度分析
|
||||||
featurePindu(dataset=dataset)
|
featurePindu(dataset=dataset)
|
||||||
# 向上填充
|
# 向上填充
|
||||||
df = df.ffill()
|
df = df.ffill()
|
||||||
@ -491,26 +500,35 @@ def datachuli(df_zhibiaoshuju,df_zhibiaoliebiao,datecol='date',end_time='',y='y'
|
|||||||
# 删除周六日的数据
|
# 删除周六日的数据
|
||||||
if delweekenday:
|
if delweekenday:
|
||||||
df = df[df['ds'].dt.weekday < 5]
|
df = df[df['ds'].dt.weekday < 5]
|
||||||
|
# kdj指标
|
||||||
if add_kdj:
|
if add_kdj:
|
||||||
df = calculate_kdj(df)
|
df = calculate_kdj(df)
|
||||||
|
# 衍生时间特征
|
||||||
if is_timefurture:
|
if is_timefurture:
|
||||||
df = addtimecharacteristics(df=df,dataset=dataset)
|
df = addtimecharacteristics(df=df,dataset=dataset)
|
||||||
|
# 特征分析
|
||||||
featureAnalysis(df,dataset=dataset,y=y)
|
featureAnalysis(df,dataset=dataset,y=y)
|
||||||
return df
|
return df
|
||||||
|
|
||||||
def datachuli_juxiting(df_zhibiaoshuju,df_zhibiaoliebiao,datecol='date',end_time='',y='y',dataset='dataset',delweekenday=False,add_kdj=False,is_timefurture=False):
|
def datachuli_juxiting(df_zhibiaoshuju,df_zhibiaoliebiao,datecol='date',end_time='',y='y',dataset='dataset',delweekenday=False,add_kdj=False,is_timefurture=False):
|
||||||
|
'''
|
||||||
|
聚烯烃特征数据处理函数,
|
||||||
|
接收的是两个df,一个是指标数据,一个是指标列表
|
||||||
|
输出的是一个df,包含ds,y,指标列
|
||||||
|
'''
|
||||||
df = df_zhibiaoshuju.copy()
|
df = df_zhibiaoshuju.copy()
|
||||||
if end_time == '':
|
if end_time == '':
|
||||||
end_time = datetime.datetime.now().strftime('%Y-%m-%d')
|
end_time = datetime.datetime.now().strftime('%Y-%m-%d')
|
||||||
# date转为pddate
|
# date转为pddate
|
||||||
df.rename(columns={datecol:'ds'},inplace=True)
|
df.rename(columns={datecol:'ds'},inplace=True)
|
||||||
|
|
||||||
|
# 指定列统一减少数值
|
||||||
df[offsite_col] = df[offsite_col]-offsite
|
df[offsite_col] = df[offsite_col]-offsite
|
||||||
|
# 预测列为avg_cols的均值
|
||||||
df[y] = df[avg_cols].mean(axis=1)
|
df[y] = df[avg_cols].mean(axis=1)
|
||||||
print(df[['ds',y]+avg_cols].head(20))
|
# 去掉多余的列avg_cols
|
||||||
|
df = df.drop(columns=avg_cols)
|
||||||
|
|
||||||
# 重命名预测列
|
# 重命名预测列
|
||||||
df.rename(columns={y:'y'},inplace=True)
|
df.rename(columns={y:'y'},inplace=True)
|
||||||
# 按时间顺序排列
|
# 按时间顺序排列
|
||||||
@ -521,10 +539,10 @@ def datachuli_juxiting(df_zhibiaoshuju,df_zhibiaoliebiao,datecol='date',end_time
|
|||||||
# 获取小于等于当前日期的数据
|
# 获取小于等于当前日期的数据
|
||||||
df = df[df['ds'] <= end_time]
|
df = df[df['ds'] <= end_time]
|
||||||
logger.info(f'删除两月不更新特征前数据量:{df.shape}')
|
logger.info(f'删除两月不更新特征前数据量:{df.shape}')
|
||||||
# 去掉近最后数据对应的日期在两月以前的列,删除近2月的数据是常熟的列
|
# 去掉近最后数据对应的日期在两月以前的列,删除近2月的数据是常数的列
|
||||||
current_date = datetime.datetime.now()
|
current_date = datetime.datetime.now()
|
||||||
two_months_ago = current_date - timedelta(days=40)
|
two_months_ago = current_date - timedelta(days=40)
|
||||||
|
# 检查两月不更新的特征
|
||||||
def check_column(col_name):
|
def check_column(col_name):
|
||||||
if 'ds' in col_name or 'y' in col_name:
|
if 'ds' in col_name or 'y' in col_name:
|
||||||
return False
|
return False
|
||||||
|
@ -441,8 +441,5 @@ class SQLiteHandler:
|
|||||||
else:
|
else:
|
||||||
print(f"Column '{column_name}' already exists in table '{table_name}'.")
|
print(f"Column '{column_name}' already exists in table '{table_name}'.")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
print('This is a tool, not a script.')
|
print('This is a tool, not a script.')
|
@ -26,7 +26,6 @@ def predict_main():
|
|||||||
)
|
)
|
||||||
# 获取数据
|
# 获取数据
|
||||||
if is_eta:
|
if is_eta:
|
||||||
# eta数据
|
|
||||||
logger.info('从eta获取数据...')
|
logger.info('从eta获取数据...')
|
||||||
signature = BinanceAPI(APPID, SECRET)
|
signature = BinanceAPI(APPID, SECRET)
|
||||||
etadata = EtaReader(signature=signature,
|
etadata = EtaReader(signature=signature,
|
||||||
@ -48,6 +47,7 @@ def predict_main():
|
|||||||
df = datachuli(df_zhibiaoshuju,df_zhibiaoliebiao,y = y,dataset=dataset,add_kdj=add_kdj,is_timefurture=is_timefurture,end_time=end_time)
|
df = datachuli(df_zhibiaoshuju,df_zhibiaoliebiao,y = y,dataset=dataset,add_kdj=add_kdj,is_timefurture=is_timefurture,end_time=end_time)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
# 读取数据
|
||||||
logger.info('读取本地数据:'+os.path.join(dataset,data_set))
|
logger.info('读取本地数据:'+os.path.join(dataset,data_set))
|
||||||
df = getdata(filename=os.path.join(dataset,data_set),y=y,dataset=dataset,add_kdj=add_kdj,is_timefurture=is_timefurture,end_time=end_time) # 原始数据,未处理
|
df = getdata(filename=os.path.join(dataset,data_set),y=y,dataset=dataset,add_kdj=add_kdj,is_timefurture=is_timefurture,end_time=end_time) # 原始数据,未处理
|
||||||
|
|
Loading…
Reference in New Issue
Block a user