聚烯烃eta数据配置

This commit is contained in:
workpc 2025-02-13 11:25:03 +08:00
parent c428e9d993
commit ec1bab5429
3 changed files with 179 additions and 73 deletions

View File

@ -1,55 +1,62 @@
# 统计特征频度 # 创建eta自定义指标数据
# 读取文件 from lib.dataread import *
import pandas as pd
df = pd.read_csv("D:\code\huarongqiming\碳酸锂合并数据.csv",encoding='gbk')
df['ds'] = pd.to_datetime(df['ds'])
# 按ds正序排序重置索引
df = df.sort_values(by='ds', ascending=True).reset_index(drop=True)
# 统计特征频度 signature = BinanceAPI(APPID, SECRET)
# 每列随机抽取6个值计算出5个时间间隔统计每个时间间隔的频度 etadata = EtaReader(signature=signature,
columns = df.columns.to_list() classifylisturl=classifylisturl,
columns.remove('ds') classifyidlisturl=classifyidlisturl,
count_dict = {} edbcodedataurl=edbcodedataurl,
for column in columns: edbcodelist=edbcodelist,
# 获取每列时间间隔 edbdatapushurl=edbdatapushurl,
values = df[[column,'ds']] edbdeleteurl=edbdeleteurl,
values.dropna(inplace=True,axis=0) edbbusinessurl=edbbusinessurl
values=values.reset_index(drop=True) )
# 抽取10个值 logger.info('从eta获取数据...')
value = values.sample(10) signature = BinanceAPI(APPID, SECRET)
index = value.index etadata = EtaReader(signature=signature,
next_index = index + 1 classifylisturl=classifylisturl,
count = [] classifyidlisturl=classifyidlisturl,
for i,j in zip(index, next_index): edbcodedataurl=edbcodedataurl,
#通过索引计算日期差 edbcodelist=edbcodelist,
try: edbdatapushurl=edbdatapushurl,
count.append((values.loc[j,'ds'] - values.loc[i,'ds']).days) edbdeleteurl=edbdeleteurl,
except: edbbusinessurl=edbbusinessurl,
pass )
# 把31 换成 30
count = [30 if i == 31 else i for i in count] # eta自有数据指标编码
# 保留count中出现次数最多的数 modelsindex = {
count = max(set(count), key=count.count) 'NHITS': 'SELF0000077',
# 存储到字典中 'Informer':'SELF0000078',
count_dict[column] = count 'LSTM':'SELF0000079',
'iTransformer':'SELF0000080',
df = pd.DataFrame(count_dict,index=['count']).T 'TSMixer':'SELF0000081',
pindu_dfs = pd.DataFrame() 'TSMixerx':'SELF0000082',
# 根据count分组 'PatchTST':'SELF0000083',
# 输出特征频度统计 'RNN':'SELF0000084',
pindudict = {'1':'日度','7':'周度','30':'月度','90':'季度','180':'半年度','365':'年度'} 'GRU':'SELF0000085',
for i in df.groupby('count'): 'TCN':'SELF0000086',
# 获取 i[1] 的索引值 'BiTCN':'SELF0000087',
index = i[1].index 'DilatedRNN':'SELF0000088',
pindu_df = pd.DataFrame() 'MLP':'SELF0000089',
pindu_df[pindudict[str(i[0])]+f'({len(i[1])})'] = index 'DLinear':'SELF0000090',
# 合并到pindu_dfs 'NLinear':'SELF0000091',
pindu_dfs = pd.concat([pindu_dfs,pindu_df],axis=1) 'TFT':'SELF0000092',
# nan替换为 ' ' 'FEDformer':'SELF0000093',
pindu_dfs = pindu_dfs.fillna('') 'StemGNN':'SELF0000094',
pindu_dfs.to_csv('D:\code\huarongqiming\pindu.csv',index=False) 'MLPMultivariate':'SELF0000095',
print(pindu_dfs) 'TiDE':'SELF0000096',
print('*'*200) 'DeepNPTS':'SELF0000097'
}
date = '2025-02-13'
value = 333444
for m in modelsindex.keys():
list = []
list.append({'Date':date,'Value':value})
data['DataList'] = list
# data['IndexCode'] = modelsindex[m]
data['IndexName'] = f'聚烯烃价格预测{m}模型'
data['Remark'] = m
etadata.push_data(data)

View File

@ -82,30 +82,31 @@ edbcodenamedict = {
# eta自有数据指标编码 # eta自有数据指标编码
modelsindex = { modelsindex = {
'NHITS': 'SELF0000001', 'NHITS': 'SELF0000077',
'Informer':'SELF0000057', 'Informer':'SELF0000078',
'LSTM':'SELF0000058', 'LSTM':'SELF0000079',
'iTransformer':'SELF0000059', 'iTransformer':'SELF0000080',
'TSMixer':'SELF0000060', 'TSMixer':'SELF0000081',
'TSMixerx':'SELF0000061', 'TSMixerx':'SELF0000082',
'PatchTST':'SELF0000062', 'PatchTST':'SELF0000083',
'RNN':'SELF0000063', 'RNN':'SELF0000084',
'GRU':'SELF0000064', 'GRU':'SELF0000085',
'TCN':'SELF0000065', 'TCN':'SELF0000086',
'BiTCN':'SELF0000066', 'BiTCN':'SELF0000087',
'DilatedRNN':'SELF0000067', 'DilatedRNN':'SELF0000088',
'MLP':'SELF0000068', 'MLP':'SELF0000089',
'DLinear':'SELF0000069', 'DLinear':'SELF0000090',
'NLinear':'SELF0000070', 'NLinear':'SELF0000091',
'TFT':'SELF0000071', 'TFT':'SELF0000092',
'FEDformer':'SELF0000072', 'FEDformer':'SELF0000093',
'StemGNN':'SELF0000073', 'StemGNN':'SELF0000094',
'MLPMultivariate':'SELF0000074', 'MLPMultivariate':'SELF0000095',
'TiDE':'SELF0000075', 'TiDE':'SELF0000096',
'DeepNPTS':'SELF0000076' 'DeepNPTS':'SELF0000097'
} }
# eta 上传预测结果的请求体,后面发起请求的时候更改 model datalist 数据 # eta 上传预测结果的请求体,后面发起请求的时候更改 model datalist 数据
data = { data = {
"IndexCode": "", "IndexCode": "",

View File

@ -753,6 +753,85 @@ def datachuli(df_zhibiaoshuju,df_zhibiaoliebiao,datecol='date',end_time='',y='y'
featureAnalysis(df,dataset=dataset,y=y) featureAnalysis(df,dataset=dataset,y=y)
return df return df
def zhoududatachuli(df_zhibiaoshuju,df_zhibiaoliebiao,datecol='date',end_time='',y='y',dataset='dataset',delweekenday=False,add_kdj=False,is_timefurture=False):
'''
原油特征周度数据处理函数
接收的是两个df一个是指标数据一个是指标列表
输出的是一个df包含dsy指标列
'''
df = df_zhibiaoshuju.copy()
if end_time == '':
end_time = datetime.datetime.now().strftime('%Y-%m-%d')
# 重命名时间列,预测列
df.rename(columns={datecol:'ds'},inplace=True)
df.rename(columns={y:'y'},inplace=True)
# 按时间顺序排列
df.sort_values(by='ds',inplace=True)
df['ds'] = pd.to_datetime(df['ds'])
# 获取start_year年到end_time的数据
df = df[df['ds'].dt.year >= start_year]
df = df[df['ds'] <= end_time]
# last_update_times_df,y_last_update_time = create_feature_last_update_time(df)
# logger.info(f'删除预警的特征前数据量:{df.shape}')
# columns_to_drop = last_update_times_df[last_update_times_df['warning_date'] < y_last_update_time ]['feature'].values.tolist()
# df = df.drop(columns = columns_to_drop)
# logger.info(f'删除预警的特征后数据量:{df.shape}')
# if is_update_warning_data:
# upload_warning_info(last_update_times_df,y_last_update_time)
# 去掉近最后数据对应的日期在六月以前的列删除近2月的数据是常熟的列
if is_del_tow_month:
current_date = datetime.datetime.now()
two_months_ago = current_date - timedelta(days=180)
logger.info(f'删除两月不更新特征前数据量:{df.shape}')
columns_to_drop = []
for clo in df.columns:
if check_column(df,clo,two_months_ago):
columns_to_drop.append(clo)
df = df.drop(columns=columns_to_drop)
logger.info(f'删除两月不更新特征后数据量:{df.shape}')
if freq == 'W':
# 按周取样
df = df.resample('W', on='ds').mean().reset_index()
elif freq == 'M':
# 按月取样
df = df.resample('M', on='ds').mean().reset_index()
# 删除预测列空值的行
''' 工作日缺失,如果删除,会影响预测结果,导致统计准确率出错 '''
# df = df.dropna(subset=['y'])
logger.info(f'删除预测列为空值的行后数据量:{df.shape}')
df = df.dropna(axis=1, how='all')
logger.info(f'删除全为空值的列后数据量:{df.shape}')
df.to_csv(os.path.join(dataset,'未填充的特征数据.csv'),index=False)
# 去掉指标列表中的columns_to_drop的行
df_zhibiaoliebiao = df_zhibiaoliebiao[df_zhibiaoliebiao['指标名称'].isin(df.columns.tolist())]
df_zhibiaoliebiao.to_csv(os.path.join(dataset,'特征处理后的指标名称及分类.csv'),index=False)
# 数据频度分析
featurePindu(dataset=dataset)
# 向上填充
df = df.ffill()
# 向下填充
df = df.bfill()
# 删除周六日的数据
if delweekenday:
df = df[df['ds'].dt.weekday < 5]
# kdj指标
if add_kdj:
df = calculate_kdj(df)
# 衍生时间特征
if is_timefurture:
df = addtimecharacteristics(df=df,dataset=dataset)
# 特征分析
featureAnalysis(df,dataset=dataset,y=y)
return df
def datachuli_juxiting(df_zhibiaoshuju,df_zhibiaoliebiao,datecol='date',end_time='',y='y',dataset='dataset',delweekenday=False,add_kdj=False,is_timefurture=False): def datachuli_juxiting(df_zhibiaoshuju,df_zhibiaoliebiao,datecol='date',end_time='',y='y',dataset='dataset',delweekenday=False,add_kdj=False,is_timefurture=False):
''' '''
聚烯烃特征数据处理函数 聚烯烃特征数据处理函数
@ -847,6 +926,25 @@ def getdata(filename, datecol='date',y='y',dataset='',add_kdj=False,is_timefurtu
return df,df_zhibiaoliebiao return df,df_zhibiaoliebiao
def getzhoududata(filename, datecol='date',y='y',dataset='',add_kdj=False,is_timefurture=False,end_time=''):
logger.info('getdata接收'+filename+' '+datecol+' '+end_time)
# 判断后缀名 csv或excel
if filename.endswith('.csv'):
df = loadcsv(filename)
else:
# 读取excel 指标数据
df_zhibiaoshuju = pd.read_excel(filename,sheet_name='指标数据')
df_zhibiaoliebiao = pd.read_excel(filename,sheet_name='指标列表')
# 日期字符串转为datatime
df = zhoududatachuli(df_zhibiaoshuju,df_zhibiaoliebiao,datecol,y = y,dataset=dataset,add_kdj=add_kdj,is_timefurture=is_timefurture,end_time=end_time)
return df,df_zhibiaoliebiao
def getdata_juxiting(filename, datecol='date',y='y',dataset='',add_kdj=False,is_timefurture=False,end_time=''): def getdata_juxiting(filename, datecol='date',y='y',dataset='',add_kdj=False,is_timefurture=False,end_time=''):
logger.info('getdata接收'+filename+' '+datecol+' '+end_time) logger.info('getdata接收'+filename+' '+datecol+' '+end_time)
# 判断后缀名 csv或excel # 判断后缀名 csv或excel