Compare commits
No commits in common. "f40d928caa259d0a8e5621b8865b9a50b8ff3e58" and "2a044b3ae12f271f40de5eaa77cdfd8be7ab6423" have entirely different histories.
f40d928caa
...
2a044b3ae1
111
aa copy.py
111
aa copy.py
@ -1,62 +1,55 @@
|
|||||||
# 创建eta自定义指标数据
|
# 统计特征频度
|
||||||
|
|
||||||
from lib.dataread import *
|
# 读取文件
|
||||||
|
import pandas as pd
|
||||||
|
df = pd.read_csv("D:\code\huarongqiming\碳酸锂合并数据.csv",encoding='gbk')
|
||||||
|
df['ds'] = pd.to_datetime(df['ds'])
|
||||||
|
# 按ds正序排序,重置索引
|
||||||
|
df = df.sort_values(by='ds', ascending=True).reset_index(drop=True)
|
||||||
|
|
||||||
signature = BinanceAPI(APPID, SECRET)
|
# 统计特征频度
|
||||||
etadata = EtaReader(signature=signature,
|
# 每列随机抽取6个值,计算出5个时间间隔,统计每个时间间隔的频度
|
||||||
classifylisturl=classifylisturl,
|
columns = df.columns.to_list()
|
||||||
classifyidlisturl=classifyidlisturl,
|
columns.remove('ds')
|
||||||
edbcodedataurl=edbcodedataurl,
|
count_dict = {}
|
||||||
edbcodelist=edbcodelist,
|
for column in columns:
|
||||||
edbdatapushurl=edbdatapushurl,
|
# 获取每列时间间隔
|
||||||
edbdeleteurl=edbdeleteurl,
|
values = df[[column,'ds']]
|
||||||
edbbusinessurl=edbbusinessurl
|
values.dropna(inplace=True,axis=0)
|
||||||
)
|
values=values.reset_index(drop=True)
|
||||||
|
|
||||||
logger.info('从eta获取数据...')
|
# 抽取10个值
|
||||||
signature = BinanceAPI(APPID, SECRET)
|
value = values.sample(10)
|
||||||
etadata = EtaReader(signature=signature,
|
index = value.index
|
||||||
classifylisturl=classifylisturl,
|
next_index = index + 1
|
||||||
classifyidlisturl=classifyidlisturl,
|
count = []
|
||||||
edbcodedataurl=edbcodedataurl,
|
for i,j in zip(index, next_index):
|
||||||
edbcodelist=edbcodelist,
|
#通过索引计算日期差
|
||||||
edbdatapushurl=edbdatapushurl,
|
try:
|
||||||
edbdeleteurl=edbdeleteurl,
|
count.append((values.loc[j,'ds'] - values.loc[i,'ds']).days)
|
||||||
edbbusinessurl=edbbusinessurl,
|
except:
|
||||||
)
|
pass
|
||||||
|
# 把31 换成 30
|
||||||
# eta自有数据指标编码
|
count = [30 if i == 31 else i for i in count]
|
||||||
modelsindex = {
|
# 保留count中出现次数最多的数
|
||||||
'NHITS': 'SELF0000077',
|
count = max(set(count), key=count.count)
|
||||||
'Informer':'SELF0000078',
|
# 存储到字典中
|
||||||
'LSTM':'SELF0000079',
|
count_dict[column] = count
|
||||||
'iTransformer':'SELF0000080',
|
|
||||||
'TSMixer':'SELF0000081',
|
df = pd.DataFrame(count_dict,index=['count']).T
|
||||||
'TSMixerx':'SELF0000082',
|
pindu_dfs = pd.DataFrame()
|
||||||
'PatchTST':'SELF0000083',
|
# 根据count分组
|
||||||
'RNN':'SELF0000084',
|
# 输出特征频度统计
|
||||||
'GRU':'SELF0000085',
|
pindudict = {'1':'日度','7':'周度','30':'月度','90':'季度','180':'半年度','365':'年度'}
|
||||||
'TCN':'SELF0000086',
|
for i in df.groupby('count'):
|
||||||
'BiTCN':'SELF0000087',
|
# 获取 i[1] 的索引值
|
||||||
'DilatedRNN':'SELF0000088',
|
index = i[1].index
|
||||||
'MLP':'SELF0000089',
|
pindu_df = pd.DataFrame()
|
||||||
'DLinear':'SELF0000090',
|
pindu_df[pindudict[str(i[0])]+f'({len(i[1])})'] = index
|
||||||
'NLinear':'SELF0000091',
|
# 合并到pindu_dfs
|
||||||
'TFT':'SELF0000092',
|
pindu_dfs = pd.concat([pindu_dfs,pindu_df],axis=1)
|
||||||
'FEDformer':'SELF0000093',
|
# nan替换为 ' '
|
||||||
'StemGNN':'SELF0000094',
|
pindu_dfs = pindu_dfs.fillna('')
|
||||||
'MLPMultivariate':'SELF0000095',
|
pindu_dfs.to_csv('D:\code\huarongqiming\pindu.csv',index=False)
|
||||||
'TiDE':'SELF0000096',
|
print(pindu_dfs)
|
||||||
'DeepNPTS':'SELF0000097'
|
print('*'*200)
|
||||||
}
|
|
||||||
|
|
||||||
date = '2025-02-13'
|
|
||||||
value = 333444
|
|
||||||
for m in modelsindex.keys():
|
|
||||||
list = []
|
|
||||||
list.append({'Date':date,'Value':value})
|
|
||||||
data['DataList'] = list
|
|
||||||
# data['IndexCode'] = modelsindex[m]
|
|
||||||
data['IndexName'] = f'聚烯烃价格预测{m}模型'
|
|
||||||
data['Remark'] = m
|
|
||||||
etadata.push_data(data)
|
|
@ -82,31 +82,30 @@ edbcodenamedict = {
|
|||||||
|
|
||||||
# eta自有数据指标编码
|
# eta自有数据指标编码
|
||||||
modelsindex = {
|
modelsindex = {
|
||||||
'NHITS': 'SELF0000077',
|
'NHITS': 'SELF0000001',
|
||||||
'Informer':'SELF0000078',
|
'Informer':'SELF0000057',
|
||||||
'LSTM':'SELF0000079',
|
'LSTM':'SELF0000058',
|
||||||
'iTransformer':'SELF0000080',
|
'iTransformer':'SELF0000059',
|
||||||
'TSMixer':'SELF0000081',
|
'TSMixer':'SELF0000060',
|
||||||
'TSMixerx':'SELF0000082',
|
'TSMixerx':'SELF0000061',
|
||||||
'PatchTST':'SELF0000083',
|
'PatchTST':'SELF0000062',
|
||||||
'RNN':'SELF0000084',
|
'RNN':'SELF0000063',
|
||||||
'GRU':'SELF0000085',
|
'GRU':'SELF0000064',
|
||||||
'TCN':'SELF0000086',
|
'TCN':'SELF0000065',
|
||||||
'BiTCN':'SELF0000087',
|
'BiTCN':'SELF0000066',
|
||||||
'DilatedRNN':'SELF0000088',
|
'DilatedRNN':'SELF0000067',
|
||||||
'MLP':'SELF0000089',
|
'MLP':'SELF0000068',
|
||||||
'DLinear':'SELF0000090',
|
'DLinear':'SELF0000069',
|
||||||
'NLinear':'SELF0000091',
|
'NLinear':'SELF0000070',
|
||||||
'TFT':'SELF0000092',
|
'TFT':'SELF0000071',
|
||||||
'FEDformer':'SELF0000093',
|
'FEDformer':'SELF0000072',
|
||||||
'StemGNN':'SELF0000094',
|
'StemGNN':'SELF0000073',
|
||||||
'MLPMultivariate':'SELF0000095',
|
'MLPMultivariate':'SELF0000074',
|
||||||
'TiDE':'SELF0000096',
|
'TiDE':'SELF0000075',
|
||||||
'DeepNPTS':'SELF0000097'
|
'DeepNPTS':'SELF0000076'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# eta 上传预测结果的请求体,后面发起请求的时候更改 model datalist 数据
|
# eta 上传预测结果的请求体,后面发起请求的时候更改 model datalist 数据
|
||||||
data = {
|
data = {
|
||||||
"IndexCode": "",
|
"IndexCode": "",
|
||||||
|
@ -753,85 +753,6 @@ def datachuli(df_zhibiaoshuju,df_zhibiaoliebiao,datecol='date',end_time='',y='y'
|
|||||||
featureAnalysis(df,dataset=dataset,y=y)
|
featureAnalysis(df,dataset=dataset,y=y)
|
||||||
return df
|
return df
|
||||||
|
|
||||||
def zhoududatachuli(df_zhibiaoshuju,df_zhibiaoliebiao,datecol='date',end_time='',y='y',dataset='dataset',delweekenday=False,add_kdj=False,is_timefurture=False):
|
|
||||||
'''
|
|
||||||
原油特征周度数据处理函数,
|
|
||||||
接收的是两个df,一个是指标数据,一个是指标列表
|
|
||||||
输出的是一个df,包含ds,y,指标列
|
|
||||||
'''
|
|
||||||
df = df_zhibiaoshuju.copy()
|
|
||||||
|
|
||||||
if end_time == '':
|
|
||||||
end_time = datetime.datetime.now().strftime('%Y-%m-%d')
|
|
||||||
# 重命名时间列,预测列
|
|
||||||
df.rename(columns={datecol:'ds'},inplace=True)
|
|
||||||
df.rename(columns={y:'y'},inplace=True)
|
|
||||||
# 按时间顺序排列
|
|
||||||
df.sort_values(by='ds',inplace=True)
|
|
||||||
df['ds'] = pd.to_datetime(df['ds'])
|
|
||||||
# 获取start_year年到end_time的数据
|
|
||||||
df = df[df['ds'].dt.year >= start_year]
|
|
||||||
df = df[df['ds'] <= end_time]
|
|
||||||
# last_update_times_df,y_last_update_time = create_feature_last_update_time(df)
|
|
||||||
# logger.info(f'删除预警的特征前数据量:{df.shape}')
|
|
||||||
# columns_to_drop = last_update_times_df[last_update_times_df['warning_date'] < y_last_update_time ]['feature'].values.tolist()
|
|
||||||
# df = df.drop(columns = columns_to_drop)
|
|
||||||
# logger.info(f'删除预警的特征后数据量:{df.shape}')
|
|
||||||
# if is_update_warning_data:
|
|
||||||
# upload_warning_info(last_update_times_df,y_last_update_time)
|
|
||||||
# 去掉近最后数据对应的日期在六月以前的列,删除近2月的数据是常熟的列
|
|
||||||
if is_del_tow_month:
|
|
||||||
current_date = datetime.datetime.now()
|
|
||||||
two_months_ago = current_date - timedelta(days=180)
|
|
||||||
logger.info(f'删除两月不更新特征前数据量:{df.shape}')
|
|
||||||
columns_to_drop = []
|
|
||||||
for clo in df.columns:
|
|
||||||
if check_column(df,clo,two_months_ago):
|
|
||||||
columns_to_drop.append(clo)
|
|
||||||
df = df.drop(columns=columns_to_drop)
|
|
||||||
|
|
||||||
logger.info(f'删除两月不更新特征后数据量:{df.shape}')
|
|
||||||
|
|
||||||
if freq == 'W':
|
|
||||||
# 按周取样
|
|
||||||
df = df.resample('W', on='ds').mean().reset_index()
|
|
||||||
elif freq == 'M':
|
|
||||||
# 按月取样
|
|
||||||
df = df.resample('M', on='ds').mean().reset_index()
|
|
||||||
# 删除预测列空值的行
|
|
||||||
''' 工作日缺失,如果删除,会影响预测结果,导致统计准确率出错 '''
|
|
||||||
# df = df.dropna(subset=['y'])
|
|
||||||
logger.info(f'删除预测列为空值的行后数据量:{df.shape}')
|
|
||||||
df = df.dropna(axis=1, how='all')
|
|
||||||
logger.info(f'删除全为空值的列后数据量:{df.shape}')
|
|
||||||
df.to_csv(os.path.join(dataset,'未填充的特征数据.csv'),index=False)
|
|
||||||
# 去掉指标列表中的columns_to_drop的行
|
|
||||||
df_zhibiaoliebiao = df_zhibiaoliebiao[df_zhibiaoliebiao['指标名称'].isin(df.columns.tolist())]
|
|
||||||
df_zhibiaoliebiao.to_csv(os.path.join(dataset,'特征处理后的指标名称及分类.csv'),index=False)
|
|
||||||
# 数据频度分析
|
|
||||||
featurePindu(dataset=dataset)
|
|
||||||
# 向上填充
|
|
||||||
df = df.ffill()
|
|
||||||
# 向下填充
|
|
||||||
df = df.bfill()
|
|
||||||
|
|
||||||
# 删除周六日的数据
|
|
||||||
if delweekenday:
|
|
||||||
df = df[df['ds'].dt.weekday < 5]
|
|
||||||
|
|
||||||
# kdj指标
|
|
||||||
if add_kdj:
|
|
||||||
df = calculate_kdj(df)
|
|
||||||
# 衍生时间特征
|
|
||||||
if is_timefurture:
|
|
||||||
df = addtimecharacteristics(df=df,dataset=dataset)
|
|
||||||
# 特征分析
|
|
||||||
featureAnalysis(df,dataset=dataset,y=y)
|
|
||||||
return df
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def datachuli_juxiting(df_zhibiaoshuju,df_zhibiaoliebiao,datecol='date',end_time='',y='y',dataset='dataset',delweekenday=False,add_kdj=False,is_timefurture=False):
|
def datachuli_juxiting(df_zhibiaoshuju,df_zhibiaoliebiao,datecol='date',end_time='',y='y',dataset='dataset',delweekenday=False,add_kdj=False,is_timefurture=False):
|
||||||
'''
|
'''
|
||||||
聚烯烃特征数据处理函数,
|
聚烯烃特征数据处理函数,
|
||||||
@ -926,25 +847,6 @@ def getdata(filename, datecol='date',y='y',dataset='',add_kdj=False,is_timefurtu
|
|||||||
|
|
||||||
return df,df_zhibiaoliebiao
|
return df,df_zhibiaoliebiao
|
||||||
|
|
||||||
def getzhoududata(filename, datecol='date',y='y',dataset='',add_kdj=False,is_timefurture=False,end_time=''):
|
|
||||||
logger.info('getdata接收:'+filename+' '+datecol+' '+end_time)
|
|
||||||
# 判断后缀名 csv或excel
|
|
||||||
if filename.endswith('.csv'):
|
|
||||||
df = loadcsv(filename)
|
|
||||||
else:
|
|
||||||
# 读取excel 指标数据
|
|
||||||
df_zhibiaoshuju = pd.read_excel(filename,sheet_name='指标数据')
|
|
||||||
df_zhibiaoliebiao = pd.read_excel(filename,sheet_name='指标列表')
|
|
||||||
|
|
||||||
|
|
||||||
# 日期字符串转为datatime
|
|
||||||
df = zhoududatachuli(df_zhibiaoshuju,df_zhibiaoliebiao,datecol,y = y,dataset=dataset,add_kdj=add_kdj,is_timefurture=is_timefurture,end_time=end_time)
|
|
||||||
|
|
||||||
return df,df_zhibiaoliebiao
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def getdata_juxiting(filename, datecol='date',y='y',dataset='',add_kdj=False,is_timefurture=False,end_time=''):
|
def getdata_juxiting(filename, datecol='date',y='y',dataset='',add_kdj=False,is_timefurture=False,end_time=''):
|
||||||
logger.info('getdata接收:'+filename+' '+datecol+' '+end_time)
|
logger.info('getdata接收:'+filename+' '+datecol+' '+end_time)
|
||||||
# 判断后缀名 csv或excel
|
# 判断后缀名 csv或excel
|
||||||
|
Loading…
Reference in New Issue
Block a user