py文件
This commit is contained in:
parent
296db87abc
commit
c76cebdec6
55
aa copy.py
Normal file
55
aa copy.py
Normal file
@ -0,0 +1,55 @@
|
||||
# 统计特征频度
|
||||
|
||||
# 读取文件
|
||||
import pandas as pd
|
||||
df = pd.read_csv("D:\code\huarongqiming\碳酸锂合并数据.csv",encoding='gbk')
|
||||
df['ds'] = pd.to_datetime(df['ds'])
|
||||
# 按ds正序排序,重置索引
|
||||
df = df.sort_values(by='ds', ascending=True).reset_index(drop=True)
|
||||
|
||||
# 统计特征频度
|
||||
# 每列随机抽取6个值,计算出5个时间间隔,统计每个时间间隔的频度
|
||||
columns = df.columns.to_list()
|
||||
columns.remove('ds')
|
||||
count_dict = {}
|
||||
for column in columns:
|
||||
# 获取每列时间间隔
|
||||
values = df[[column,'ds']]
|
||||
values.dropna(inplace=True,axis=0)
|
||||
values=values.reset_index(drop=True)
|
||||
|
||||
# 抽取10个值
|
||||
value = values.sample(10)
|
||||
index = value.index
|
||||
next_index = index + 1
|
||||
count = []
|
||||
for i,j in zip(index, next_index):
|
||||
#通过索引计算日期差
|
||||
try:
|
||||
count.append((values.loc[j,'ds'] - values.loc[i,'ds']).days)
|
||||
except:
|
||||
pass
|
||||
# 把31 换成 30
|
||||
count = [30 if i == 31 else i for i in count]
|
||||
# 保留count中出现次数最多的数
|
||||
count = max(set(count), key=count.count)
|
||||
# 存储到字典中
|
||||
count_dict[column] = count
|
||||
|
||||
df = pd.DataFrame(count_dict,index=['count']).T
|
||||
pindu_dfs = pd.DataFrame()
|
||||
# 根据count分组
|
||||
# 输出特征频度统计
|
||||
pindudict = {'1':'日度','7':'周度','30':'月度','90':'季度','180':'半年度','365':'年度'}
|
||||
for i in df.groupby('count'):
|
||||
# 获取 i[1] 的索引值
|
||||
index = i[1].index
|
||||
pindu_df = pd.DataFrame()
|
||||
pindu_df[pindudict[str(i[0])]+f'({len(i[1])})'] = index
|
||||
# 合并到pindu_dfs
|
||||
pindu_dfs = pd.concat([pindu_dfs,pindu_df],axis=1)
|
||||
# nan替换为 ' '
|
||||
pindu_dfs = pindu_dfs.fillna('')
|
||||
pindu_dfs.to_csv('D:\code\huarongqiming\pindu.csv',index=False)
|
||||
print(pindu_dfs)
|
||||
print('*'*200)
|
10
aa.py
Normal file
10
aa.py
Normal file
@ -0,0 +1,10 @@
|
||||
# 定时执行cmd命令
|
||||
import os
|
||||
import time
|
||||
|
||||
while True:
|
||||
print(time.strftime('%H:%M'))
|
||||
# 判断是不是工作日且 是17:00 7:00 才执行
|
||||
if time.strftime('%A') not in ['Saturday', 'Sunday'] and time.strftime('%H:%M') in [ '07:00']:
|
||||
os.system(' D:/ProgramData/anaconda3/python.exe main.py')
|
||||
time.sleep(60)
|
254
config_jingbo.py
Normal file
254
config_jingbo.py
Normal file
@ -0,0 +1,254 @@
|
||||
import logging
|
||||
import os
|
||||
import logging.handlers
|
||||
import datetime
|
||||
|
||||
|
||||
# eta 接口token
|
||||
APPID = "XNLDvxZHHugj7wJ7"
|
||||
SECRET = "iSeU4s6cKKBVbt94htVY1p0sqUMqb2xa"
|
||||
|
||||
# eta 接口url
|
||||
sourcelisturl = 'http://10.189.2.78:8108/v1/edb/source/list'
|
||||
classifylisturl = 'http://10.189.2.78:8108/v1/edb/classify/list?ClassifyType='
|
||||
uniquecodedataurl = 'http://10.189.2.78:8108/v1/edb/data?UniqueCode=4991c37becba464609b409909fe4d992&StartDate=2024-02-01'
|
||||
classifyidlisturl = 'http://10.189.2.78:8108/v1/edb/list?ClassifyId='
|
||||
edbcodedataurl = 'http://10.189.2.78:8108/v1/edb/data?EdbCode='
|
||||
edbdatapushurl = 'http://10.189.2.78:8108/v1/edb/push'
|
||||
edbdeleteurl = 'http://10.189.2.78:8108/v1/edb/business/edb/del'
|
||||
edbbusinessurl = 'http://10.189.2.78:8108/v1/edb/business/data/del'
|
||||
edbcodelist = ['CO1 Comdty', 'ovx index', 'C2404194834', 'C2404199738', 'dxy curncy', 'C2403128043', 'C2403150124',
|
||||
'DOESCRUD Index', 'WTRBM1 EEGC Index', 'FVHCM1 INDEX', 'doedtprd index', 'CFFDQMMN INDEX',
|
||||
'C2403083739', 'C2404167878', 'C2403250571', 'lmcads03 lme comdty', 'GC1 COMB Comdty',
|
||||
'C2404171822','C2404167855']
|
||||
|
||||
# 临时写死用指定的列,与上面的edbcode对应,后面更改
|
||||
edbnamelist = [
|
||||
'ds','y',
|
||||
'Brent c1-c6','Brent c1-c3','Brent-WTI','美国商业原油库存',
|
||||
'DFL','美国汽油裂解价差','ovx index','dxy curncy','lmcads03 lme comdty',
|
||||
'C2403128043','C2403150124','FVHCM1 INDEX','doedtprd index','CFFDQMMN INDEX',
|
||||
'C2403083739','C2404167878',
|
||||
'GC1 COMB Comdty','C2404167855'
|
||||
]
|
||||
|
||||
|
||||
|
||||
# eta自有数据指标编码
|
||||
modelsindex = {
|
||||
'NHITS': 'SELF0000001',
|
||||
'Informer':'SELF0000057',
|
||||
'LSTM':'SELF0000058',
|
||||
'iTransformer':'SELF0000059',
|
||||
'TSMixer':'SELF0000060',
|
||||
'TSMixerx':'SELF0000061',
|
||||
'PatchTST':'SELF0000062',
|
||||
'RNN':'SELF0000063',
|
||||
'GRU':'SELF0000064',
|
||||
'TCN':'SELF0000065',
|
||||
'BiTCN':'SELF0000066',
|
||||
'DilatedRNN':'SELF0000067',
|
||||
'MLP':'SELF0000068',
|
||||
'DLinear':'SELF0000069',
|
||||
'NLinear':'SELF0000070',
|
||||
'TFT':'SELF0000071',
|
||||
'FEDformer':'SELF0000072',
|
||||
'StemGNN':'SELF0000073',
|
||||
'MLPMultivariate':'SELF0000074',
|
||||
'TiDE':'SELF0000075',
|
||||
'DeepNPTS':'SELF0000076'
|
||||
}
|
||||
|
||||
# eta 上传预测结果的请求体,后面发起请求的时候更改 model datalist 数据
|
||||
data = {
|
||||
"IndexCode": "",
|
||||
"IndexName": "价格预测模型",
|
||||
"Unit": "无",
|
||||
"Frequency": "日度",
|
||||
"SourceName": f"价格预测",
|
||||
"Remark": 'ddd',
|
||||
"DataList": [
|
||||
{
|
||||
"Date": "2024-05-02",
|
||||
"Value": 333444
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
# eta 分类
|
||||
# level:3才可以获取到数据,所以需要人工把能源化工下所有的level3级都找到
|
||||
# url = 'http://10.189.2.78:8108/v1/edb/list?ClassifyId=1214'
|
||||
#ParentId ":1160, 能源化工
|
||||
# ClassifyId ":1214,原油
|
||||
#ParentId ":1214,",就是原油下所有的数据。
|
||||
ClassifyId = 1214
|
||||
|
||||
|
||||
### 报告上传配置
|
||||
# 变量定义--线上环境
|
||||
# login_pushreport_url = "http://10.200.32.39/jingbo-api/api/server/login"
|
||||
# upload_url = "http://10.200.32.39/jingbo-api/api/analysis/reportInfo/researchUploadReportSave"
|
||||
|
||||
# login_data = {
|
||||
# "data": {
|
||||
# "account": "api_dev",
|
||||
# "password": "ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=",
|
||||
# "tenantHashCode": "8a4577dbd919675758d57999a1e891fe",
|
||||
# "terminal": "API"
|
||||
# },
|
||||
# "funcModule": "API",
|
||||
# "funcOperation": "获取token"
|
||||
# }
|
||||
|
||||
|
||||
|
||||
# upload_data = {
|
||||
# "funcModule":'研究报告信息',
|
||||
# "funcOperation":'上传原油价格预测报告',
|
||||
# "data":{
|
||||
# "ownerAccount":'27663', #报告所属用户账号 27663 - 刘小朋
|
||||
# "reportType":'OIL_PRICE_FORECAST', # 报告类型,固定为OIL_PRICE_FORECAST
|
||||
# "fileName": '', #文件名称
|
||||
# "fileBase64": '' ,#文件内容base64
|
||||
# "categoryNo":'yyjgycbg', # 研究报告分类编码
|
||||
# "smartBusinessClassCode":'YCJGYCBG', #分析报告分类编码
|
||||
# "reportEmployeeCode":"E40482" ,# 报告人 E40482 - 管理员 0000027663 - 刘小朋
|
||||
# "reportDeptCode" :"002000621000", # 报告部门 - 002000621000 SH期货研究部
|
||||
# "productGroupCode":"RAW_MATERIAL" # 商品分类
|
||||
# }
|
||||
# }
|
||||
|
||||
|
||||
|
||||
# # 变量定义--测试环境
|
||||
login_pushreport_url = "http://192.168.100.53:8080/jingbo-dev/api/server/login"
|
||||
upload_url = "http://192.168.100.53:8080/jingbo-dev/api/analysis/reportInfo/researchUploadReportSave"
|
||||
# upload_url = "http://192.168.100.109:8080/jingbo/api/analysis/reportInfo/researchUploadReportSave" # zhaoqiwei
|
||||
|
||||
|
||||
login_data = {
|
||||
"data": {
|
||||
"account": "api_test",
|
||||
"password": "MmVmNzNlOWI0MmY0ZDdjZGUwNzE3ZjFiMDJiZDZjZWU=",
|
||||
"tenantHashCode": "8a4577dbd919675758d57999a1e891fe",
|
||||
"terminal": "API"
|
||||
},
|
||||
"funcModule": "API",
|
||||
"funcOperation": "获取token"
|
||||
}
|
||||
|
||||
upload_data = {
|
||||
"funcModule":'研究报告信息',
|
||||
"funcOperation":'上传原油价格预测报告',
|
||||
"data":{
|
||||
"ownerAccount":'arui', #报告所属用户账号
|
||||
"reportType":'OIL_PRICE_FORECAST', # 报告类型,固定为OIL_PRICE_FORECAST
|
||||
"fileName": '2000-40-5-50--100-原油指标数据.xlsx-Brent活跃合约--2024-09-06-15-01-29-预测报告.pdf', #文件名称
|
||||
"fileBase64": '' ,#文件内容base64
|
||||
"categoryNo":'yyjgycbg', # 研究报告分类编码
|
||||
"smartBusinessClassCode":'YCJGYCBG', #分析报告分类编码
|
||||
"reportEmployeeCode":"E40116", # 报告人
|
||||
"reportDeptCode" :"D0044" ,# 报告部门
|
||||
"productGroupCode":"RAW_MATERIAL" # 商品分类
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
### 线上开关
|
||||
# is_train = True # 是否训练
|
||||
# is_debug = False # 是否调试
|
||||
# is_eta = True # 是否使用eta接口
|
||||
# is_timefurture = True # 是否使用时间特征
|
||||
# is_fivemodels = False # 是否使用之前保存的最佳的5个模型
|
||||
# is_edbcode = False # 特征使用edbcoding列表中的
|
||||
# is_edbnamelist = False # 自定义特征,对应上面的edbnamelist
|
||||
# is_update_eta = True # 预测结果上传到eta
|
||||
# is_update_report = True # 是否上传报告
|
||||
|
||||
### 开关
|
||||
is_train = True # 是否训练
|
||||
is_debug = False # 是否调试
|
||||
is_eta = True # 是否使用eta接口
|
||||
is_timefurture = True # 是否使用时间特征
|
||||
is_fivemodels = False # 是否使用之前保存的最佳的5个模型
|
||||
is_edbcode = False # 特征使用edbcoding列表中的
|
||||
is_edbnamelist = False # 自定义特征,对应上面的edbnamelist
|
||||
is_update_eta = False # 预测结果上传到eta
|
||||
is_update_report = False # 是否上传报告
|
||||
|
||||
|
||||
# 数据截取日期
|
||||
end_time = '' # 数据截取日期
|
||||
delweekenday = True
|
||||
is_corr = False # 特征是否参与滞后领先提升相关系数
|
||||
add_kdj = False # 是否添加kdj指标
|
||||
if add_kdj and is_edbnamelist:
|
||||
edbnamelist = edbnamelist+['K','D','J']
|
||||
### 模型参数
|
||||
y = 'Brent活跃合约' # 原油指标数据的目标变量
|
||||
# y = '期货结算价(连续):布伦特原油:前一个观测值' # ineoil的目标变量
|
||||
horizon =5 # 预测的步长
|
||||
input_size = 40 # 输入序列长度
|
||||
train_steps = 50 if is_debug else 1000 # 训练步数,用来限定epoch次数
|
||||
val_check_steps = 30 # 评估频率
|
||||
early_stop_patience_steps = 5 # 早停的耐心步数
|
||||
# --- 交叉验证用的参数
|
||||
test_size = 200 # 测试集大小,定义100,后面使用的时候重新赋值
|
||||
val_size = test_size # 验证集大小,同测试集大小
|
||||
|
||||
### 特征筛选用到的参数
|
||||
k = 100 # 特征筛选数量,如果是0或者值比特征数量大,代表全部特征
|
||||
|
||||
|
||||
|
||||
### 文件
|
||||
data_set = '原油指标数据.xlsx' # 数据集文件
|
||||
# data_set = 'INE_OIL(1).csv'
|
||||
### 文件夹
|
||||
dataset = 'dataset' # 数据集文件夹
|
||||
|
||||
# 数据库名称
|
||||
db_name = os.path.join(dataset,'jbsh_yuanyou.db')
|
||||
|
||||
settings = f'{input_size}-{horizon}-{train_steps}--{k}-{data_set}-{y}'
|
||||
# 获取日期时间
|
||||
now = datetime.datetime.now().strftime('%Y%m%d%H%M%S') # 获取当前日期时间
|
||||
reportname = f'Brent原油大模型预测--{now}.pdf' # 报告文件名
|
||||
reportname = reportname.replace(':', '-') # 替换冒号
|
||||
|
||||
### 邮件配置
|
||||
username='1321340118@qq.com'
|
||||
passwd='wgczgyhtyyyyjghi'
|
||||
# recv=['liurui_test@163.com','52585119@qq.com']
|
||||
recv=['liurui_test@163.com']
|
||||
title='reportname'
|
||||
content='brent价格预测报告请看附件'
|
||||
file=os.path.join(dataset,'reportname')
|
||||
# file=os.path.join(dataset,'14-7-50--100-原油指标数据.xlsx-Brent连1合约价格--20240731175936-预测报告.pdf')
|
||||
ssl=True
|
||||
|
||||
|
||||
### 日志配置
|
||||
|
||||
# 创建日志目录(如果不存在)
|
||||
log_dir = 'logs'
|
||||
if not os.path.exists(log_dir):
|
||||
os.makedirs(log_dir)
|
||||
|
||||
# 配置日志记录器
|
||||
logger = logging.getLogger('my_logger')
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
# 配置文件处理器,将日志记录到文件
|
||||
file_handler = logging.handlers.RotatingFileHandler(os.path.join(log_dir, 'pricepredict.log'), maxBytes=1024 * 1024, backupCount=5)
|
||||
file_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
|
||||
|
||||
# 配置控制台处理器,将日志打印到控制台
|
||||
console_handler = logging.StreamHandler()
|
||||
console_handler.setFormatter(logging.Formatter('%(message)s'))
|
||||
|
||||
# 将处理器添加到日志记录器
|
||||
logger.addHandler(file_handler)
|
||||
logger.addHandler(console_handler)
|
||||
|
||||
# logger.info('当前配置:'+settings)
|
297
config_juxiting.py
Normal file
297
config_juxiting.py
Normal file
@ -0,0 +1,297 @@
|
||||
import logging
|
||||
import os
|
||||
import logging.handlers
|
||||
import datetime
|
||||
|
||||
|
||||
# eta 接口token
|
||||
APPID = "XNLDvxZHHugj7wJ7"
|
||||
SECRET = "iSeU4s6cKKBVbt94htVY1p0sqUMqb2xa"
|
||||
|
||||
# eta 接口url
|
||||
sourcelisturl = 'http://10.189.2.78:8108/v1/edb/source/list'
|
||||
classifylisturl = 'http://10.189.2.78:8108/v1/edb/classify/list?ClassifyType='
|
||||
uniquecodedataurl = 'http://10.189.2.78:8108/v1/edb/data?UniqueCode=4991c37becba464609b409909fe4d992&StartDate=2024-02-01'
|
||||
classifyidlisturl = 'http://10.189.2.78:8108/v1/edb/list?ClassifyId='
|
||||
edbcodedataurl = 'http://10.189.2.78:8108/v1/edb/data?EdbCode='
|
||||
edbdatapushurl = 'http://10.189.2.78:8108/v1/edb/push'
|
||||
edbdeleteurl = 'http://10.189.2.78:8108/v1/edb/business/edb/del'
|
||||
edbbusinessurl = 'http://10.189.2.78:8108/v1/edb/business/data/del'
|
||||
edbcodelist = ['ID01385938','lmcads03 lme comdty',
|
||||
'GC1 COMB Comdty',
|
||||
'C2404171822',
|
||||
'dxy curncy',
|
||||
'S5443199 ',
|
||||
'S5479800',
|
||||
'S5443108',
|
||||
'H7358586',
|
||||
'LC3FM1 INDEX',
|
||||
'CNY REGN Curncy',
|
||||
's0105897',
|
||||
'M0067419',
|
||||
'M0066351',
|
||||
'S0266372',
|
||||
'S0266438',
|
||||
'S0266506']
|
||||
|
||||
# 临时写死用指定的列,与上面的edbcode对应,后面更改
|
||||
edbnamelist = [
|
||||
'ds','y',
|
||||
'LME铜价',
|
||||
'黄金连1合约',
|
||||
'Brent-WTI',
|
||||
'美元指数',
|
||||
'甲醇鲁南价格',
|
||||
'甲醇太仓港口价格',
|
||||
'山东丙烯主流价',
|
||||
'丙烷(山东)',
|
||||
'FEI丙烷 M1',
|
||||
'在岸人民币汇率',
|
||||
'南华工业品指数',
|
||||
'PVC期货主力',
|
||||
'PE期货收盘价',
|
||||
'PP连续-1月',
|
||||
'PP连续-5月',
|
||||
'PP连续-9月',
|
||||
]
|
||||
|
||||
edbcodenamedict = {
|
||||
'ID01385938':'PP:拉丝:1102K:市场价:青州:国家能源宁煤(日)',
|
||||
'lmcads03 lme comdty':'LME铜价',
|
||||
'GC1 COMB Comdty':'黄金连1合约',
|
||||
'C2404171822':'Brent-WTI',
|
||||
'dxy curncy':'美元指数',
|
||||
'S5443199 ':'甲醇鲁南价格',
|
||||
'S5479800':'甲醇太仓港口价格',
|
||||
'S5443108':'山东丙烯主流价',
|
||||
'H7358586':'丙烷(山东)',
|
||||
'LC3FM1 INDEX':'FEI丙烷 M1',
|
||||
'CNY REGN Curncy':'在岸人民币汇率',
|
||||
's0105897':'南华工业品指数',
|
||||
'M0067419':'PVC期货主力',
|
||||
'M0066351':'PE期货收盘价',
|
||||
'S0266372':'PP连续-1月',
|
||||
'S0266438':'PP连续-5月',
|
||||
'S0266506':'PP连续-9月',
|
||||
|
||||
}
|
||||
|
||||
# eta自有数据指标编码
|
||||
modelsindex = {
|
||||
'NHITS': 'SELF0000001',
|
||||
'Informer':'SELF0000057',
|
||||
'LSTM':'SELF0000058',
|
||||
'iTransformer':'SELF0000059',
|
||||
'TSMixer':'SELF0000060',
|
||||
'TSMixerx':'SELF0000061',
|
||||
'PatchTST':'SELF0000062',
|
||||
'RNN':'SELF0000063',
|
||||
'GRU':'SELF0000064',
|
||||
'TCN':'SELF0000065',
|
||||
'BiTCN':'SELF0000066',
|
||||
'DilatedRNN':'SELF0000067',
|
||||
'MLP':'SELF0000068',
|
||||
'DLinear':'SELF0000069',
|
||||
'NLinear':'SELF0000070',
|
||||
'TFT':'SELF0000071',
|
||||
'FEDformer':'SELF0000072',
|
||||
'StemGNN':'SELF0000073',
|
||||
'MLPMultivariate':'SELF0000074',
|
||||
'TiDE':'SELF0000075',
|
||||
'DeepNPTS':'SELF0000076'
|
||||
}
|
||||
|
||||
# eta 上传预测结果的请求体,后面发起请求的时候更改 model datalist 数据
|
||||
data = {
|
||||
"IndexCode": "",
|
||||
"IndexName": "价格预测模型",
|
||||
"Unit": "无",
|
||||
"Frequency": "日度",
|
||||
"SourceName": f"价格预测",
|
||||
"Remark": 'ddd',
|
||||
"DataList": [
|
||||
{
|
||||
"Date": "2024-05-02",
|
||||
"Value": 333444
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
# eta 分类
|
||||
# level:3才可以获取到数据,所以需要人工把能源化工下所有的level3级都找到
|
||||
# url = 'http://10.189.2.78:8108/v1/edb/list?ClassifyId=1214'
|
||||
#ParentId ":1160, 能源化工
|
||||
# ClassifyId ":1214,原油 ,1161 PP
|
||||
#ParentId ":1214,",就是原油下所有的数据。
|
||||
ClassifyId = 1161
|
||||
|
||||
|
||||
### 报告上传配置
|
||||
# 变量定义--线上环境
|
||||
login_pushreport_url = "http://10.200.32.39/jingbo-api/api/server/login"
|
||||
upload_url = "http://10.200.32.39/jingbo-api/api/dw/dataValue/pushDataValueList"
|
||||
|
||||
|
||||
login_data = {
|
||||
"data": {
|
||||
"account": "api_dev",
|
||||
"password": "ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=",
|
||||
"tenantHashCode": "8a4577dbd919675758d57999a1e891fe",
|
||||
"terminal": "API"
|
||||
},
|
||||
"funcModule": "API",
|
||||
"funcOperation": "获取token"
|
||||
}
|
||||
|
||||
|
||||
|
||||
upload_data = {
|
||||
"funcModule":'研究报告信息',
|
||||
"funcOperation":'上传原油价格预测报告',
|
||||
"data":{
|
||||
"ownerAccount":'27663', #报告所属用户账号 27663 - 刘小朋
|
||||
"reportType":'OIL_PRICE_FORECAST', # 报告类型,固定为OIL_PRICE_FORECAST
|
||||
"fileName": '', #文件名称
|
||||
"fileBase64": '' ,#文件内容base64
|
||||
"categoryNo":'yyjgycbg', # 研究报告分类编码
|
||||
"smartBusinessClassCode":'YCJGYCBG', #分析报告分类编码
|
||||
"reportEmployeeCode":"E40482" ,# 报告人 E40482 - 管理员 0000027663 - 刘小朋
|
||||
"reportDeptCode" :"002000621000", # 报告部门 - 002000621000 SH期货研究部
|
||||
"productGroupCode":"RAW_MATERIAL" # 商品分类
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
# # 变量定义--测试环境
|
||||
# login_pushreport_url = "http://192.168.100.53:8080/jingbo-dev/api/server/login"
|
||||
# upload_url = "http://192.168.100.53:8080/jingbo-dev/api/analysis/reportInfo/researchUploadReportSave"
|
||||
# # upload_url = "http://192.168.100.109:8080/jingbo/api/analysis/reportInfo/researchUploadReportSave" # zhaoqiwei
|
||||
|
||||
|
||||
# login_data = {
|
||||
# "data": {
|
||||
# "account": "api_test",
|
||||
# "password": "MmVmNzNlOWI0MmY0ZDdjZGUwNzE3ZjFiMDJiZDZjZWU=",
|
||||
# "tenantHashCode": "8a4577dbd919675758d57999a1e891fe",
|
||||
# "terminal": "API"
|
||||
# },
|
||||
# "funcModule": "API",
|
||||
# "funcOperation": "获取token"
|
||||
# }
|
||||
|
||||
# upload_data = {
|
||||
# "funcModule":'研究报告信息',
|
||||
# "funcOperation":'上传原油价格预测报告',
|
||||
# "data":{
|
||||
# "ownerAccount":'arui', #报告所属用户账号
|
||||
# "reportType":'OIL_PRICE_FORECAST', # 报告类型,固定为OIL_PRICE_FORECAST
|
||||
# "fileName": '2000-40-5-50--100-原油指标数据.xlsx-Brent活跃合约--2024-09-06-15-01-29-预测报告.pdf', #文件名称
|
||||
# "fileBase64": '' ,#文件内容base64
|
||||
# "categoryNo":'yyjgycbg', # 研究报告分类编码
|
||||
# "smartBusinessClassCode":'YCJGYCBG', #分析报告分类编码
|
||||
# "reportEmployeeCode":"E40116", # 报告人
|
||||
# "reportDeptCode" :"D0044" ,# 报告部门
|
||||
# "productGroupCode":"RAW_MATERIAL" # 商品分类
|
||||
# }
|
||||
# }
|
||||
|
||||
|
||||
### 线上开关
|
||||
# is_train = True # 是否训练
|
||||
# is_debug = False # 是否调试
|
||||
# is_eta = True # 是否使用eta接口
|
||||
# is_timefurture = True # 是否使用时间特征
|
||||
# is_fivemodels = False # 是否使用之前保存的最佳的5个模型
|
||||
# is_edbcode = False # 特征使用edbcoding列表中的
|
||||
# is_edbnamelist = False # 自定义特征,对应上面的edbnamelist
|
||||
# is_update_report = True # 是否上传报告
|
||||
|
||||
|
||||
### 开关
|
||||
is_train = True # 是否训练
|
||||
is_debug = False # 是否调试
|
||||
is_eta = True # 是否使用eta接口
|
||||
is_timefurture = True # 是否使用时间特征
|
||||
is_fivemodels = False # 是否使用之前保存的最佳的5个模型
|
||||
is_edbcode = False # 特征使用edbcoding列表中的
|
||||
is_edbnamelist = False # 自定义特征,对应上面的edbnamelist
|
||||
is_update_eta = False # 预测结果上传到eta
|
||||
is_update_report = False # 是否上传报告
|
||||
|
||||
# 数据截取日期
|
||||
end_time = '' # 数据截取日期
|
||||
delweekenday = True
|
||||
is_corr = False # 特征是否参与滞后领先提升相关系数
|
||||
add_kdj = False # 是否添加kdj指标
|
||||
if add_kdj and is_edbnamelist:
|
||||
edbnamelist = edbnamelist+['K','D','J']
|
||||
### 模型参数
|
||||
y = 'PP:拉丝:1102K:市场价:青州:国家能源宁煤(日)' # 原油指标数据的目标变量
|
||||
# y = '期货结算价(连续):布伦特原油:前一个观测值' # ineoil的目标变量
|
||||
horizon =5 # 预测的步长
|
||||
input_size = 40 # 输入序列长度
|
||||
train_steps = 50 if is_debug else 1000 # 训练步数,用来限定epoch次数
|
||||
val_check_steps = 30 # 评估频率
|
||||
early_stop_patience_steps = 5 # 早停的耐心步数
|
||||
# --- 交叉验证用的参数
|
||||
test_size = 200 # 测试集大小,定义100,后面使用的时候重新赋值
|
||||
val_size = test_size # 验证集大小,同测试集大小
|
||||
|
||||
### 特征筛选用到的参数
|
||||
k = 100 # 特征筛选数量,如果是0或者值比特征数量大,代表全部特征
|
||||
|
||||
|
||||
|
||||
### 文件
|
||||
data_set = 'PP指标数据.xlsx' # 数据集文件
|
||||
# data_set = 'INE_OIL(1).csv'
|
||||
### 文件夹
|
||||
dataset = 'dataset' # 数据集文件夹
|
||||
|
||||
# 数据库名称
|
||||
db_name = os.path.join(dataset,'jbsh_juxiting.db')
|
||||
|
||||
|
||||
settings = f'{input_size}-{horizon}-{train_steps}--{k}-{data_set}-{y}'
|
||||
# 获取日期时间
|
||||
now = datetime.datetime.now().strftime('%Y%m%d%H%M%S') # 获取当前日期时间
|
||||
reportname = f'PP--{now}-预测报告.pdf' # 报告文件名
|
||||
reportname = reportname.replace(':', '-') # 替换冒号
|
||||
|
||||
### 邮件配置
|
||||
username='1321340118@qq.com'
|
||||
passwd='wgczgyhtyyyyjghi'
|
||||
# recv=['liurui_test@163.com','52585119@qq.com']
|
||||
recv=['liurui_test@163.com']
|
||||
title=reportname
|
||||
content=y+'预测报告请看附件'
|
||||
file=os.path.join(dataset,reportname)
|
||||
# file=os.path.join(dataset,'14-7-50--100-原油指标数据.xlsx-Brent连1合约价格--20240731175936-预测报告.pdf')
|
||||
ssl=True
|
||||
|
||||
|
||||
### 日志配置
|
||||
|
||||
# 创建日志目录(如果不存在)
|
||||
log_dir = 'logs'
|
||||
if not os.path.exists(log_dir):
|
||||
os.makedirs(log_dir)
|
||||
|
||||
# 配置日志记录器
|
||||
logger = logging.getLogger('my_logger')
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
# 配置文件处理器,将日志记录到文件
|
||||
file_handler = logging.handlers.RotatingFileHandler(os.path.join(log_dir, 'pricepredict.log'), maxBytes=1024 * 1024, backupCount=5)
|
||||
file_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
|
||||
|
||||
# 配置控制台处理器,将日志打印到控制台
|
||||
console_handler = logging.StreamHandler()
|
||||
console_handler.setFormatter(logging.Formatter('%(message)s'))
|
||||
|
||||
# 将处理器添加到日志记录器
|
||||
logger.addHandler(file_handler)
|
||||
logger.addHandler(console_handler)
|
||||
|
||||
# logger.info('当前配置:'+settings)
|
106
config_tansuanli.py
Normal file
106
config_tansuanli.py
Normal file
@ -0,0 +1,106 @@
|
||||
import logging
|
||||
import os
|
||||
import logging.handlers
|
||||
|
||||
|
||||
# eta 接口token
|
||||
APPID = "XNLDvxZHHugj7wJ7"
|
||||
SECRET = "iSeU4s6cKKBVbt94htVY1p0sqUMqb2xa"
|
||||
|
||||
# eta 接口url
|
||||
sourcelisturl = 'http://10.189.2.78:8108/v1/edb/source/list'
|
||||
classifylisturl = 'http://10.189.2.78:8108/v1/edb/classify/list?ClassifyType='
|
||||
uniquecodedataurl = 'http://10.189.2.78:8108/v1/edb/data?UniqueCode=4991c37becba464609b409909fe4d992&StartDate=2024-02-01'
|
||||
classifyidlisturl = 'http://10.189.2.78:8108/v1/edb/list?ClassifyId='
|
||||
edbcodedataurl = 'http://10.189.2.78:8108/v1/edb/data?EdbCode='
|
||||
edbcodelist = ['CO1 Comdty', 'ovx index', 'C2404194834', 'C2404199738', 'dxy curncy', 'C2403128043', 'C2403150124',
|
||||
'DOESCRUD Index', 'WTRBM1 EEGC Index', 'FVHCM1 INDEX', 'doedtprd index', 'CFFDQMMN INDEX',
|
||||
'C2403083739', 'C2404167878', 'C2403250571', 'ovx index', 'lmcads03 lme comdty', 'GC1 COMB Comdty',
|
||||
'C2404171822']
|
||||
|
||||
# 临时写死用指定的列,与上面的edbcode对应,后面更改
|
||||
edbnamelist = [
|
||||
'ds','y',
|
||||
'Brent c1-c6','Brent c1-c3','Brent-WTI','美国商业原油库存',
|
||||
'DFL','美国汽油裂解价差','ovx index','dxy curncy','lmcads03 lme comdty',
|
||||
'C2403128043','C2403150124','FVHCM1 INDEX','doedtprd index','CFFDQMMN INDEX',
|
||||
'C2403083739','C2404167878',
|
||||
# 'ovx index',
|
||||
'GC1 COMB Comdty'
|
||||
]
|
||||
|
||||
# eta 分类
|
||||
# level:3才可以获取到数据,所以需要人工把能源化工下所有的level3级都找到
|
||||
# url = 'http://10.189.2.78:8108/v1/edb/list?ClassifyId=1214'
|
||||
#ParentId ":1160, 能源化工
|
||||
# ClassifyId ":1214,原油
|
||||
#ParentId ":1214,",就是原油下所有的数据。
|
||||
ClassifyId = 1214
|
||||
|
||||
### 开关
|
||||
is_train = True # 是否训练
|
||||
is_debug = True # 是否调试
|
||||
is_eta = False # 是否使用eta接口
|
||||
is_timefurture = False # 是否使用时间特征
|
||||
is_fivemodels = False # 是否使用之前保存的最佳的5个模型
|
||||
is_edbcode = False # 特征使用edbcoding列表中的
|
||||
is_edbnamelist = False # 启用指定的edbname,影响特征选择,只有数据集 原油时 才考虑启用
|
||||
|
||||
# 数据截取日期
|
||||
end_time = '2024-07-30' # 数据截取日期
|
||||
delweekenday = True # 是否删除周末数据
|
||||
|
||||
### 模型参数
|
||||
y = '电碳价格' # 原油指标数据的目标变量
|
||||
horizon =5 # 预测的步长
|
||||
input_size = 10 # 输入序列长度
|
||||
train_steps = 10 if is_debug else 1000 # 训练步数,用来限定epoch次数
|
||||
val_check_steps = 30 # 评估频率
|
||||
early_stop_patience_steps = 5 # 早停的耐心步数
|
||||
### --- 交叉验证用的参数
|
||||
test_size = 100 # 测试集大小,定义100,后面使用的时候重新赋值
|
||||
val_size = test_size # 验证集大小,同测试集大小
|
||||
|
||||
### --- 特征筛选用到的参数
|
||||
k = 100 # 特征筛选数量,如果是0或者值比特征数量大,代表全部特征
|
||||
|
||||
### --- 文件
|
||||
data_set = '碳酸锂合并数据.csv' # 数据集文件
|
||||
### --- 文件夹
|
||||
dataset = 'dataset' # 数据集文件夹
|
||||
settings = f'{input_size}-{horizon}-{train_steps}--{k}-{data_set}-{y}'
|
||||
import datetime
|
||||
now = datetime.datetime.now().strftime('%Y%m%d%H%M%S') # 获取当前日期时间
|
||||
reportname = f'{settings}--{now}-预测报告.pdf' # 报告文件名
|
||||
reportname = reportname.replace(':', '-') # 替换冒号
|
||||
|
||||
### 邮件配置
|
||||
username='1321340118@qq.com'
|
||||
passwd='wgczgyhtyyyyjghi'
|
||||
# recv=['liurui_test@163.com','52585119@qq.com']
|
||||
recv=['liurui_test@163.com']
|
||||
title='reportname'
|
||||
content='brent价格预测报告请看附件'
|
||||
file=os.path.join(dataset,'reportname')
|
||||
# file=os.path.join(dataset,'14-7-50--100-原油指标数据.xlsx-Brent连1合约价格--20240731175936-预测报告.pdf')
|
||||
ssl=True
|
||||
|
||||
|
||||
### --- 日志配置
|
||||
# 创建日志目录(如果不存在)
|
||||
log_dir = 'logs'
|
||||
if not os.path.exists(log_dir):
|
||||
os.makedirs(log_dir)
|
||||
# 配置日志记录器
|
||||
logger = logging.getLogger('my_logger')
|
||||
logger.setLevel(logging.INFO)
|
||||
# 配置文件处理器,将日志记录到文件
|
||||
file_handler = logging.handlers.RotatingFileHandler(os.path.join(log_dir, 'pricepredict.log'), maxBytes=1024 * 1024, backupCount=5)
|
||||
file_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
|
||||
# 配置控制台处理器,将日志打印到控制台
|
||||
console_handler = logging.StreamHandler()
|
||||
console_handler.setFormatter(logging.Formatter('%(message)s'))
|
||||
# 将处理器添加到日志记录器
|
||||
logger.addHandler(file_handler)
|
||||
logger.addHandler(console_handler)
|
||||
logger.info('当前配置:'+settings)
|
0
lib/__init__.py
Normal file
0
lib/__init__.py
Normal file
1391
lib/dataread.py
Normal file
1391
lib/dataread.py
Normal file
File diff suppressed because it is too large
Load Diff
191
lib/duojinchengpredict.py
Normal file
191
lib/duojinchengpredict.py
Normal file
@ -0,0 +1,191 @@
|
||||
import pandas as pd
|
||||
import re
|
||||
import os
|
||||
import pandas as pd
|
||||
|
||||
import multiprocessing
|
||||
import time
|
||||
import joblib
|
||||
import torch
|
||||
|
||||
# 定义函数
|
||||
def loadcsv(filename):
|
||||
try:
|
||||
df = pd.read_csv(filename, encoding='utf-8')
|
||||
except UnicodeDecodeError:
|
||||
df = pd.read_csv(filename, encoding='gbk')
|
||||
return df
|
||||
|
||||
|
||||
def datachuli(df, datecol='date'):
|
||||
# 删除空列
|
||||
df = df.dropna(axis=1, how='all')
|
||||
# 向上填充
|
||||
df.ffill
|
||||
# 向下填充
|
||||
df.bfill
|
||||
# date转为pddate
|
||||
df.rename(columns={datecol: 'ds'}, inplace=True)
|
||||
# 设置ds为pd.datetime
|
||||
df['ds'] = pd.to_datetime(df['ds'])
|
||||
# 重命名预测列
|
||||
df.rename(columns={'Brent连1合约价格': 'y'}, inplace=True)
|
||||
|
||||
return df
|
||||
|
||||
|
||||
def getdata(filename, datecol='date'):
|
||||
df = loadcsv(filename)
|
||||
df = datachuli(df, datecol)
|
||||
return df
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# 预测函数
|
||||
def predict(X_test, nf,result_list):
|
||||
df_predict = nf.predict(X_test).reset_index()
|
||||
result_list.append(df_predict.values.tolist())
|
||||
return df_predict
|
||||
|
||||
|
||||
def testSetPredict(X_test, nf, columns,dataset):
|
||||
|
||||
# 记录开始时间
|
||||
start_time = time.time()
|
||||
|
||||
# 计算每个进程处理的样本数
|
||||
num_samples = len(X_test)
|
||||
num_processes = multiprocessing.cpu_count()
|
||||
samples_per_process = num_samples // num_processes
|
||||
|
||||
manager = multiprocessing.Manager()
|
||||
result_list = manager.list() # 创建共享的列表
|
||||
# 创建进程池
|
||||
with multiprocessing.Pool(num_processes) as pool:
|
||||
processes = []
|
||||
for i in range(num_processes):
|
||||
# 计算 每个进程需要处理的数据索引
|
||||
start_index = i * samples_per_process
|
||||
end_index = (i + 1) * samples_per_process if i != num_processes - 1 else num_samples
|
||||
# 按计算的索引切分数据
|
||||
X_test_split = X_test[start_index:end_index]
|
||||
# 添加任务到进程池
|
||||
for X in X_test_split:
|
||||
processes.append(pool.apply_async(predict, args=(X, nf,result_list)))
|
||||
for process in processes:
|
||||
process.get()
|
||||
# 将共享列表中的数据转换回 DataFrame
|
||||
df_combined = pd.DataFrame()
|
||||
df_combined2 = pd.DataFrame()
|
||||
for result in result_list:
|
||||
try:
|
||||
df_shared = pd.DataFrame(result, columns=['index', 'ds'] + columns)
|
||||
df_combined = pd.concat([df_combined, df_shared]).reset_index(drop=True)
|
||||
except ValueError:
|
||||
# 如果数据不匹配,就放到另一个 DataFrame 中
|
||||
df_shared2 = pd.DataFrame(result, columns=['index', 'ds']+ columns2)
|
||||
df_combined2 = pd.concat([df_combined2, df_shared2]).reset_index(drop=True)
|
||||
# df_combined.drop(['index'], axis=1, inplace=True)
|
||||
df_combined.to_csv(os.path.join(dataset, 'df_combined.csv'), index=False)
|
||||
# df_combined2.drop(['index'], axis=1, inplace=True)
|
||||
df_combined2.to_csv('df_combined.csv', index=False)
|
||||
end_time = time.time()
|
||||
# 打印运行时间,转为时分秒
|
||||
print("运行时间:", end_time - start_time, "秒")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# 记录开始时间
|
||||
start_time = time.time()
|
||||
|
||||
|
||||
# file = '指标数据处理.csv'
|
||||
file = 'brentpricepredict.csv'
|
||||
df = getdata(file)
|
||||
df.head()
|
||||
|
||||
# 选择特征和标签列
|
||||
X = df.drop(['y', 'ds'], axis=1) # 特征集,排除时间戳和标签列 Brent连1合约价格
|
||||
y = df['y'] # 标签集
|
||||
|
||||
# 计算训练集的结束索引,占总数据的80%
|
||||
split_index = int(0.8 * df.shape[0])
|
||||
|
||||
# 按照时间顺序划分训练集和测试集
|
||||
df_train = df[:split_index]
|
||||
df_test = df[split_index:]
|
||||
df_train['unique_id'] = 1
|
||||
df_test['unique_id'] = 1
|
||||
|
||||
df_combined = pd.DataFrame()
|
||||
df_test = df_test.reindex()
|
||||
# df_test = df_test[-20:]
|
||||
|
||||
# 读取模型列表,用来预测结果列名
|
||||
columns = [
|
||||
'NHITS',
|
||||
'Informer',
|
||||
'LSTM',
|
||||
'iTransformer',
|
||||
'TSMixer',
|
||||
'TSMixerx',
|
||||
'PatchTST',
|
||||
'RNN',
|
||||
'GRU',
|
||||
'TCN',
|
||||
'DeepAR',
|
||||
'BiTCN',
|
||||
'DilatedRNN',
|
||||
'MLP',
|
||||
'DLinear',
|
||||
'NLinear',
|
||||
'TFT',
|
||||
'FEDformer',
|
||||
'StemGNN',
|
||||
'MLPMultivariate',
|
||||
'TiDE',
|
||||
'DeepNPTS',
|
||||
]
|
||||
|
||||
# deepar 的预测结果会多 五个列,需要单独处理
|
||||
columns2 = [
|
||||
'NHITS',
|
||||
'Informer',
|
||||
'LSTM',
|
||||
'iTransformer',
|
||||
'TSMixer',
|
||||
'TSMixerx',
|
||||
'PatchTST',
|
||||
'RNN',
|
||||
'GRU',
|
||||
'TCN',
|
||||
'DeepAR',
|
||||
'DeepAR-median',
|
||||
'DeepAR-lo-90',
|
||||
'DeepAR-lo-80',
|
||||
'DeepAR-hi-80',
|
||||
'DeepAR-hi-90',
|
||||
'BiTCN',
|
||||
'DilatedRNN',
|
||||
'MLP',
|
||||
'DLinear',
|
||||
'NLinear',
|
||||
'TFT',
|
||||
'FEDformer',
|
||||
'StemGNN',
|
||||
'MLPMultivariate',
|
||||
'TiDE',
|
||||
'DeepNPT',
|
||||
]
|
||||
|
||||
|
||||
input_size = 14
|
||||
X_test = []
|
||||
for i in range(0, len(df_test) - input_size + 1):
|
||||
X_test.append(df_test.iloc[i:i + input_size])
|
||||
|
||||
nf = joblib.load('model_reg.joblib')
|
||||
|
||||
testSetPredict(X_test, nf, columns)
|
448
lib/tools.py
Normal file
448
lib/tools.py
Normal file
@ -0,0 +1,448 @@
|
||||
import time
|
||||
import os
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
from config_jingbo import logger
|
||||
from sklearn import metrics
|
||||
import random, string, base64, hmac, hashlib
|
||||
from reportlab.pdfbase import pdfmetrics # 注册字体
|
||||
from reportlab.pdfbase.ttfonts import TTFont # 字体类
|
||||
from reportlab.platypus import Table, SimpleDocTemplate, Paragraph, Image # 报告内容相关类
|
||||
from reportlab.lib.pagesizes import letter # 页面的标志尺寸(8.5*inch, 11*inch)
|
||||
from reportlab.lib.styles import getSampleStyleSheet # 文本样式
|
||||
from reportlab.lib import colors # 颜色模块
|
||||
from reportlab.graphics.charts.barcharts import VerticalBarChart # 图表类
|
||||
from reportlab.graphics.charts.legends import Legend # 图例类
|
||||
from reportlab.graphics.shapes import Drawing # 绘图工具
|
||||
from reportlab.lib.units import cm # 单位:cm
|
||||
import smtplib
|
||||
from email.mime.text import MIMEText
|
||||
from email.mime.multipart import MIMEMultipart
|
||||
import sqlite3
|
||||
import tkinter as tk
|
||||
from tkinter import messagebox
|
||||
|
||||
|
||||
def timeit(func):
|
||||
'''计时装饰器'''
|
||||
def wrapper(*args, **kwargs):
|
||||
start_time = time.time()
|
||||
result = func(*args, **kwargs)
|
||||
end_time = time.time()
|
||||
execution_time = end_time - start_time
|
||||
logger.info(f"{func.__name__} 函数的执行时间为: {execution_time} 秒")
|
||||
return result
|
||||
return wrapper
|
||||
|
||||
class BinanceAPI:
|
||||
'''
|
||||
获取 Binance API 请求头签名
|
||||
'''
|
||||
def __init__(self, APPID, SECRET):
|
||||
self.APPID = APPID
|
||||
self.SECRET = SECRET
|
||||
self.get_signature()
|
||||
|
||||
# 生成随机字符串作为 nonce
|
||||
def generate_nonce(self, length=32):
|
||||
self.nonce = ''.join(random.choices(string.ascii_letters + string.digits, k=length))
|
||||
return self.nonce
|
||||
|
||||
# 获取当前时间戳(秒)
|
||||
def get_timestamp(self):
|
||||
return int(time.time())
|
||||
|
||||
# 构建待签名字符串
|
||||
def build_sign_str(self):
|
||||
return f'appid={self.APPID}&nonce={self.nonce}×tamp={self.timestamp}'
|
||||
|
||||
# 使用 HMAC SHA-256 计算签名
|
||||
def calculate_signature(self, secret, message):
|
||||
return base64.urlsafe_b64encode(hmac.new(secret.encode('utf-8'), message.encode('utf-8'), hashlib.sha256).digest()).decode('utf-8')
|
||||
|
||||
def get_signature(self):
|
||||
# 调用上述方法生成签名
|
||||
self.nonce = self.generate_nonce()
|
||||
self.timestamp = self.get_timestamp()
|
||||
self.sign_str = self.build_sign_str()
|
||||
self.signature = self.calculate_signature(self.SECRET, self.sign_str)
|
||||
# return self.signature
|
||||
|
||||
|
||||
class Graphs:
|
||||
'''
|
||||
pdf生成类
|
||||
'''
|
||||
# 绘制标题
|
||||
@staticmethod
|
||||
def draw_title(title: str):
|
||||
# 获取所有样式表
|
||||
style = getSampleStyleSheet()
|
||||
# 拿到标题样式
|
||||
ct = style['Heading1']
|
||||
# 单独设置样式相关属性
|
||||
ct.fontName = 'SimSun' # 字体名
|
||||
ct.fontSize = 18 # 字体大小
|
||||
ct.leading = 50 # 行间距
|
||||
ct.textColor = colors.green # 字体颜色
|
||||
ct.alignment = 1 # 居中
|
||||
ct.bold = True
|
||||
# 创建标题对应的段落,并且返回
|
||||
return Paragraph(title, ct)
|
||||
|
||||
# 绘制小标题
|
||||
@staticmethod
|
||||
def draw_little_title(title: str):
|
||||
# 获取所有样式表
|
||||
style = getSampleStyleSheet()
|
||||
# 拿到标题样式
|
||||
ct = style['Normal']
|
||||
# 单独设置样式相关属性
|
||||
ct.fontName = 'SimSun' # 字体名
|
||||
ct.fontSize = 15 # 字体大小
|
||||
ct.leading = 30 # 行间距
|
||||
ct.textColor = colors.red # 字体颜色
|
||||
# 创建标题对应的段落,并且返回
|
||||
return Paragraph(title, ct)
|
||||
|
||||
# 绘制普通段落内容
|
||||
@staticmethod
|
||||
def draw_text(text: str):
|
||||
# 获取所有样式表
|
||||
style = getSampleStyleSheet()
|
||||
# 获取普通样式
|
||||
ct = style['Normal']
|
||||
ct.fontName = 'SimSun'
|
||||
ct.fontSize = 12
|
||||
ct.wordWrap = 'CJK' # 设置自动换行
|
||||
ct.alignment = 0 # 左对齐
|
||||
ct.firstLineIndent = 32 # 第一行开头空格
|
||||
ct.leading = 25
|
||||
return Paragraph(text, ct)
|
||||
|
||||
# 绘制表格
|
||||
@staticmethod
|
||||
def draw_table(col_width,*args):
|
||||
# 列宽度
|
||||
col_width = col_width
|
||||
style = [
|
||||
('FONTNAME', (0, 0), (-1, -1), 'SimSun'), # 字体
|
||||
('FONTSIZE', (0, 0), (-1, 0), 10), # 第一行的字体大小
|
||||
('FONTSIZE', (0, 1), (-1, -1), 8), # 第二行到最后一行的字体大小
|
||||
('BACKGROUND', (0, 0), (-1, 0), '#d5dae6'), # 设置第一行背景颜色
|
||||
('ALIGN', (0, 0), (-1, -1), 'CENTER'), # 第一行水平居中
|
||||
('ALIGN', (0, 1), (-1, -1), 'LEFT'), # 第二行到最后一行左右左对齐
|
||||
('VALIGN', (0, 0), (-1, -1), 'MIDDLE'), # 所有表格上下居中对齐
|
||||
('TEXTCOLOR', (0, 0), (-1, -1), colors.darkslategray), # 设置表格内文字颜色
|
||||
('GRID', (0, 0), (-1, -1), 0.5, colors.grey), # 设置表格框线为grey色,线宽为0.5
|
||||
# ('SPAN', (0, 1), (0, 2)), # 合并第一列二三行
|
||||
# ('SPAN', (0, 3), (0, 4)), # 合并第一列三四行
|
||||
# ('SPAN', (0, 5), (0, 6)), # 合并第一列五六行
|
||||
# ('SPAN', (0, 7), (0, 8)), # 合并第一列五六行
|
||||
]
|
||||
table = Table(args, colWidths=col_width, style=style)
|
||||
return table
|
||||
|
||||
# 创建图表
|
||||
@staticmethod
|
||||
def draw_bar(bar_data: list, ax: list, items: list):
|
||||
drawing = Drawing(500, 250)
|
||||
bc = VerticalBarChart()
|
||||
bc.x = 45 # 整个图表的x坐标
|
||||
bc.y = 45 # 整个图表的y坐标
|
||||
bc.height = 200 # 图表的高度
|
||||
bc.width = 350 # 图表的宽度
|
||||
bc.data = bar_data
|
||||
bc.strokeColor = colors.black # 顶部和右边轴线的颜色
|
||||
bc.valueAxis.valueMin = 5000 # 设置y坐标的最小值
|
||||
bc.valueAxis.valueMax = 26000 # 设置y坐标的最大值
|
||||
bc.valueAxis.valueStep = 2000 # 设置y坐标的步长
|
||||
bc.categoryAxis.labels.dx = 2
|
||||
bc.categoryAxis.labels.dy = -8
|
||||
bc.categoryAxis.labels.angle = 20
|
||||
bc.categoryAxis.categoryNames = ax
|
||||
|
||||
# 图示
|
||||
leg = Legend()
|
||||
leg.fontName = 'SimSun'
|
||||
leg.alignment = 'right'
|
||||
leg.boxAnchor = 'ne'
|
||||
leg.x = 475 # 图例的x坐标
|
||||
leg.y = 240
|
||||
leg.dxTextSpace = 10
|
||||
leg.columnMaximum = 3
|
||||
leg.colorNamePairs = items
|
||||
drawing.add(leg)
|
||||
drawing.add(bc)
|
||||
return drawing
|
||||
|
||||
# 绘制图片
|
||||
@staticmethod
|
||||
def draw_img(path):
|
||||
img = Image(path) # 读取指定路径下的图片
|
||||
img.drawWidth = 20*cm # 设置图片的宽度
|
||||
img.drawHeight = 10*cm # 设置图片的高度
|
||||
return img
|
||||
|
||||
|
||||
# 评估指标不在一个库,这里列出所有用到的指标的公式
|
||||
|
||||
# MSE
|
||||
def mse(y_true, y_pred):
|
||||
|
||||
res_mse = metrics.mean_squared_error(y_true, y_pred)
|
||||
|
||||
return res_mse
|
||||
# RMSE
|
||||
def rmse(y_true, y_pred):
|
||||
|
||||
res_rmse = np.sqrt(metrics.mean_squared_error(y_true, y_pred))
|
||||
|
||||
return res_rmse
|
||||
|
||||
# MAE
|
||||
def mae(y_true, y_pred):
|
||||
|
||||
res_mae = metrics.mean_absolute_error(y_true, y_pred)
|
||||
|
||||
return res_mae
|
||||
|
||||
# sklearn的库中没有MAPE和SMAPE,下面根据公式给出算法实现
|
||||
# MAPE
|
||||
def mape(y_true, y_pred):
|
||||
|
||||
res_mape = np.mean(np.abs((y_pred - y_true) / y_true)) * 100
|
||||
|
||||
return res_mape
|
||||
|
||||
# SMAPE
|
||||
def smape(y_true, y_pred):
|
||||
|
||||
res_smape = 2.0 * np.mean(np.abs(y_pred - y_true) / (np.abs(y_pred) + np.abs(y_true))) * 100
|
||||
|
||||
return res_smape
|
||||
|
||||
# 相关系数绘制
|
||||
def plot_corr(data, size=11):
|
||||
# 去掉ds列
|
||||
data.drop(columns=['ds'], inplace=True)
|
||||
|
||||
# 创建一个空的 DataFrame 来保存相关系数
|
||||
correlation_df = pd.DataFrame(columns=['Feature', 'Correlation'])
|
||||
|
||||
# 计算各特征与目标列的皮尔逊相关系数,并保存到新的 DataFrame 中
|
||||
for col in data.columns:
|
||||
if col!= 'y':
|
||||
pearson_correlation = np.corrcoef(data[col], data['y'])[0, 1]
|
||||
spearman_correlation, _ = spearmanr(data[col], data['y'])
|
||||
new_row = {'Feature': col, 'Pearson_Correlation': round(pearson_correlation,3), 'Spearman_Correlation': round(spearman_correlation,2)}
|
||||
correlation_df = correlation_df._append(new_row, ignore_index=True)
|
||||
# 删除空列
|
||||
correlation_df.drop('Correlation', axis=1, inplace=True)
|
||||
correlation_df.dropna(inplace=True)
|
||||
correlation_df.to_csv('指标相关性分析.csv', index=False)
|
||||
|
||||
data = correlation_df['Pearson_Correlation'].values.tolist()
|
||||
# 生成 -1 到 1 的 20 个区间
|
||||
bins = np.linspace(-1, 1, 21)
|
||||
# 计算每个区间的统计数(这里是区间内数据的数量)
|
||||
hist_values = [np.sum((data >= bins[i]) & (data < bins[i + 1])) for i in range(len(bins) - 1)]
|
||||
|
||||
#设置画布大小
|
||||
plt.figure(figsize=(10, 6))
|
||||
# 绘制直方图
|
||||
plt.bar(bins[:-1], hist_values, width=(bins[1] - bins[0]))
|
||||
|
||||
# 添加标题和坐标轴标签
|
||||
plt.title('皮尔逊相关系数分布图')
|
||||
plt.xlabel('区间')
|
||||
plt.ylabel('统计数')
|
||||
plt.savefig('皮尔逊相关性系数.png')
|
||||
plt.close()
|
||||
|
||||
|
||||
#设置画布大小
|
||||
plt.figure(figsize=(10, 6))
|
||||
data = correlation_df['Spearman_Correlation'].values.tolist()
|
||||
# 计算每个区间的统计数(这里是区间内数据的数量)
|
||||
hist_values = [np.sum((data >= bins[i]) & (data < bins[i + 1])) for i in range(len(bins) - 1)]
|
||||
|
||||
# 绘制直方图
|
||||
plt.bar(bins[:-1], hist_values, width=(bins[1] - bins[0]))
|
||||
|
||||
# 添加标题和坐标轴标签
|
||||
plt.title('斯皮尔曼相关系数分布图')
|
||||
plt.xlabel('区间')
|
||||
plt.ylabel('统计数')
|
||||
plt.savefig('斯皮尔曼相关性系数.png')
|
||||
plt.close()
|
||||
|
||||
|
||||
# 邮件封装
|
||||
class SendMail(object):
|
||||
def __init__(self,username,passwd,recv,title,content,
|
||||
file=None,ssl=False,
|
||||
email_host='smtp.qq.com',port=25,ssl_port=465):
|
||||
'''
|
||||
:param username: 用户名
|
||||
:param passwd: 密码
|
||||
:param recv: 收件人,多个要传list ['a@qq.com','b@qq.com]
|
||||
:param title: 邮件标题
|
||||
:param content: 邮件正文
|
||||
:param file: 附件路径,如果不在当前目录下,要写绝对路径,默认没有附件
|
||||
:param ssl: 是否安全链接,默认为普通
|
||||
:param email_host: smtp服务器地址,默认为163服务器
|
||||
:param port: 非安全链接端口,默认为25
|
||||
:param ssl_port: 安全链接端口,默认为465
|
||||
'''
|
||||
self.username = username #用户名
|
||||
self.passwd = passwd #密码
|
||||
self.recv = recv #收件人,多个要传list ['a@qq.com','b@qq.com]
|
||||
self.title = title #邮件标题
|
||||
self.content = content #邮件正文
|
||||
self.file = file #附件路径,如果不在当前目录下,要写绝对路径
|
||||
self.email_host = email_host #smtp服务器地址
|
||||
self.port = port #普通端口
|
||||
self.ssl = ssl #是否安全链接
|
||||
self.ssl_port = ssl_port #安全链接端口
|
||||
def send_mail(self):
|
||||
msg = MIMEMultipart()
|
||||
#发送内容的对象
|
||||
if self.file:#处理附件的
|
||||
file_name = os.path.split(self.file)[-1]#只取文件名,不取路径
|
||||
try:
|
||||
f = open(self.file, 'rb').read()
|
||||
except Exception as e:
|
||||
raise Exception('附件打不开!!!!')
|
||||
else:
|
||||
att = MIMEText(f,"base64", "utf-8")
|
||||
att["Content-Type"] = 'application/octet-stream'
|
||||
#base64.b64encode(file_name.encode()).decode()
|
||||
new_file_name='=?utf-8?b?' + base64.b64encode(file_name.encode()).decode() + '?='
|
||||
#这里是处理文件名为中文名的,必须这么写
|
||||
att["Content-Disposition"] = 'attachment; filename="%s"'%(new_file_name)
|
||||
msg.attach(att)
|
||||
msg.attach(MIMEText(self.content))#邮件正文的内容
|
||||
msg['Subject'] = self.title # 邮件主题
|
||||
msg['From'] = self.username # 发送者账号
|
||||
msg['To'] = ','.join(self.recv) # 接收者账号列表
|
||||
if self.ssl:
|
||||
self.smtp = smtplib.SMTP_SSL(self.email_host,port=self.ssl_port)
|
||||
else:
|
||||
self.smtp = smtplib.SMTP(self.email_host,port=self.port)
|
||||
#发送邮件服务器的对象
|
||||
self.smtp.login(self.username,self.passwd)
|
||||
try:
|
||||
self.smtp.sendmail(self.username,self.recv,msg.as_string())
|
||||
pass
|
||||
except Exception as e:
|
||||
print('出错了。。',e)
|
||||
logger.info('邮件服务出错了。。',e)
|
||||
else:
|
||||
print('发送成功!')
|
||||
logger.info('邮件发送成功!')
|
||||
self.smtp.quit()
|
||||
|
||||
def dateConvert(df, datecol='ds'):
|
||||
# 将date列转换为datetime类型
|
||||
try:
|
||||
df[datecol] = pd.to_datetime(df[datecol],format=r'%Y-%m-%d')
|
||||
except:
|
||||
df[datecol] = pd.to_datetime(df[datecol],format=r'%Y/%m/%d')
|
||||
return df
|
||||
|
||||
|
||||
class SQLiteHandler:
|
||||
def __init__(self, db_name):
|
||||
self.db_name = db_name
|
||||
self.connection = None
|
||||
self.cursor = None
|
||||
|
||||
def connect(self):
|
||||
self.connection = sqlite3.connect(self.db_name)
|
||||
self.cursor = self.connection.cursor()
|
||||
|
||||
def close(self):
|
||||
if self.connection:
|
||||
self.connection.close()
|
||||
self.connection = None
|
||||
self.cursor = None
|
||||
|
||||
def execute_query(self, query, params=None):
|
||||
if params:
|
||||
return self.cursor.execute(query, params)
|
||||
else:
|
||||
return self.cursor.execute(query)
|
||||
|
||||
def commit(self):
|
||||
self.connection.commit()
|
||||
|
||||
def create_table(self, table_name, columns):
|
||||
query = f"CREATE TABLE IF NOT EXISTS {table_name} ({columns})"
|
||||
self.execute_query(query)
|
||||
self.commit()
|
||||
|
||||
def insert_data(self, table_name, values, columns=None):
|
||||
if columns:
|
||||
placeholders = ', '.join(['?'] * len(values))
|
||||
query = f"INSERT INTO {table_name} ({', '.join(columns)}) VALUES ({placeholders})"
|
||||
else:
|
||||
placeholders = ', '.join(['?'] * len(values))
|
||||
query = f"INSERT INTO {table_name} VALUES ({placeholders})"
|
||||
self.execute_query(query, values)
|
||||
self.commit()
|
||||
|
||||
def select_data(self, table_name, columns=None, where_condition=None, order_by=None, limit=None):
|
||||
query = f"SELECT {', '.join(columns) if columns else '*'} FROM {table_name}"
|
||||
if where_condition:
|
||||
query += f" WHERE {where_condition}"
|
||||
if order_by:
|
||||
query += f" ORDER BY {order_by}"
|
||||
if limit:
|
||||
query += f" LIMIT {limit}"
|
||||
results = self.execute_query(query).fetchall()
|
||||
if results:
|
||||
headers = [description[0] for description in self.execute_query(query).description]
|
||||
return pd.DataFrame(results, columns=headers)
|
||||
else:
|
||||
return pd.DataFrame()
|
||||
|
||||
def update_data(self, table_name, set_values, where_condition):
|
||||
query = f"UPDATE {table_name} SET {set_values} WHERE {where_condition}"
|
||||
logger.info('更新数据sql'+ query)
|
||||
self.execute_query(query)
|
||||
self.commit()
|
||||
|
||||
def delete_data(self, table_name, where_condition):
|
||||
query = f"DELETE FROM {table_name} WHERE {where_condition}"
|
||||
self.execute_query(query)
|
||||
self.commit()
|
||||
|
||||
def check_table_exists(self, table_name):
|
||||
query = f"SELECT name FROM sqlite_master WHERE type='table' AND name='{table_name}'"
|
||||
result = self.execute_query(query).fetchone()
|
||||
return result is not None
|
||||
|
||||
def add_column_if_not_exists(self, table_name, column_name, column_type):
|
||||
# 查询表结构
|
||||
query = f"PRAGMA table_info({table_name})"
|
||||
self.execute_query(query)
|
||||
columns = [column[1] for column in self.cursor.fetchall()]
|
||||
|
||||
# 判断列是否存在
|
||||
if column_name not in columns:
|
||||
# 如果列不存在,则添加列
|
||||
query = f"ALTER TABLE {table_name} ADD COLUMN {column_name} {column_type}"
|
||||
self.execute_query(query)
|
||||
self.commit()
|
||||
print(f"Column '{column_name}' added to table '{table_name}' successfully.")
|
||||
else:
|
||||
print(f"Column '{column_name}' already exists in table '{table_name}'.")
|
||||
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
print('This is a tool, not a script.')
|
176
main.py
Normal file
176
main.py
Normal file
@ -0,0 +1,176 @@
|
||||
# 读取配置
|
||||
# from config_jingbo import *
|
||||
# from config_tansuanli import *
|
||||
from config_juxiting import *
|
||||
from lib.dataread import *
|
||||
from lib.tools import *
|
||||
from models.nerulforcastmodels import ex_Model,model_losss,brent_export_pdf,tansuanli_export_pdf,pp_export_pdf
|
||||
|
||||
import glob
|
||||
import torch
|
||||
torch.set_float32_matmul_precision("high")
|
||||
|
||||
sqlitedb = SQLiteHandler(db_name)
|
||||
sqlitedb.connect()
|
||||
|
||||
def predict_main():
|
||||
signature = BinanceAPI(APPID, SECRET)
|
||||
etadata = EtaReader(signature=signature,
|
||||
classifylisturl = classifylisturl,
|
||||
classifyidlisturl=classifyidlisturl,
|
||||
edbcodedataurl=edbcodedataurl,
|
||||
edbcodelist=edbcodelist,
|
||||
edbdatapushurl=edbdatapushurl,
|
||||
edbdeleteurl=edbdeleteurl,
|
||||
edbbusinessurl=edbbusinessurl
|
||||
)
|
||||
# 获取数据
|
||||
if is_eta:
|
||||
# eta数据
|
||||
logger.info('从eta获取数据...')
|
||||
signature = BinanceAPI(APPID, SECRET)
|
||||
etadata = EtaReader(signature=signature,
|
||||
classifylisturl = classifylisturl,
|
||||
classifyidlisturl=classifyidlisturl,
|
||||
edbcodedataurl=edbcodedataurl,
|
||||
edbcodelist=edbcodelist,
|
||||
edbdatapushurl=edbdatapushurl,
|
||||
edbdeleteurl=edbdeleteurl,
|
||||
edbbusinessurl=edbbusinessurl,
|
||||
)
|
||||
|
||||
# df_zhibiaoshuju,df_zhibiaoliebiao = etadata.get_eta_api_yuanyou_data(data_set=data_set,dataset=dataset) # 原始数据,未处理
|
||||
df_zhibiaoshuju,df_zhibiaoliebiao = etadata.get_eta_api_pp_data(data_set=data_set,dataset=dataset) # 原始数据,未处理
|
||||
|
||||
|
||||
# 数据处理
|
||||
df = datachuli(df_zhibiaoshuju,df_zhibiaoliebiao,y = y,dataset=dataset,add_kdj=add_kdj,is_timefurture=is_timefurture,end_time=end_time)
|
||||
|
||||
else:
|
||||
logger.info('读取本地数据:'+os.path.join(dataset,data_set))
|
||||
df = getdata(filename=os.path.join(dataset,data_set),y=y,dataset=dataset,add_kdj=add_kdj,is_timefurture=is_timefurture,end_time=end_time) # 原始数据,未处理
|
||||
|
||||
# 更改预测列名称
|
||||
df.rename(columns={y:'y'},inplace=True)
|
||||
|
||||
if is_edbnamelist:
|
||||
df = df[edbnamelist]
|
||||
df.to_csv(os.path.join(dataset,'指标数据.csv'), index=False)
|
||||
# 保存最新日期的y值到数据库
|
||||
# 取第一行数据存储到数据库中
|
||||
first_row = df[['ds','y']].tail(1)
|
||||
# 将最新真实值保存到数据库
|
||||
if not sqlitedb.check_table_exists('trueandpredict'):
|
||||
first_row.to_sql('trueandpredict',sqlitedb.connection,index=False)
|
||||
else:
|
||||
for row in first_row.itertuples(index=False):
|
||||
row_dict = row._asdict()
|
||||
check_query = sqlitedb.select_data('trueandpredict',where_condition = f"ds = '{row.ds}'")
|
||||
if len(check_query) > 0:
|
||||
set_clause = ", ".join([f"{key} = '{value}'" for key, value in row_dict.items()])
|
||||
sqlitedb.update_data('trueandpredict',set_clause,where_condition = f"ds = '{row.ds}'")
|
||||
continue
|
||||
sqlitedb.insert_data('trueandpredict',tuple(row_dict.values()),columns=row_dict.keys())
|
||||
|
||||
import datetime
|
||||
# 判断当前日期是不是周一
|
||||
is_weekday = datetime.datetime.now().weekday() == 3
|
||||
if is_weekday:
|
||||
logger.info('今天是周一,更新预测模型')
|
||||
# 计算最近20天预测残差最低的模型名称
|
||||
|
||||
model_results = sqlitedb.select_data('trueandpredict',order_by = "ds DESC",limit = "20")
|
||||
model_results = model_results.dropna()
|
||||
modelnames = model_results.columns.to_list()[2:]
|
||||
for col in model_results[modelnames].select_dtypes(include=['object']).columns:
|
||||
model_results[col] = model_results[col].astype(np.float32)
|
||||
# 计算每个预测值与真实值之间的偏差率
|
||||
for model in modelnames:
|
||||
model_results[f'{model}_abs_error_rate'] = abs(model_results['y'] - model_results[model]) / model_results['y']
|
||||
|
||||
# 获取每行对应的最小偏差率值
|
||||
min_abs_error_rate_values = model_results.apply(lambda row: row[[f'{model}_abs_error_rate' for model in modelnames]].min(), axis=1)
|
||||
# 获取每行对应的最小偏差率值对应的列名
|
||||
min_abs_error_rate_column_name = model_results.apply(lambda row: row[[f'{model}_abs_error_rate' for model in modelnames]].idxmin(), axis=1)
|
||||
# 将列名索引转换为列名
|
||||
min_abs_error_rate_column_name = min_abs_error_rate_column_name.map(lambda x: x.split('_')[0])
|
||||
# 取出现次数最多的模型名称
|
||||
most_common_model = min_abs_error_rate_column_name.value_counts().idxmax()
|
||||
logger.info(f"最近20天预测残差最低的模型名称:{most_common_model}")
|
||||
|
||||
# 保存结果到数据库
|
||||
|
||||
if not sqlitedb.check_table_exists('most_model'):
|
||||
sqlitedb.create_table('most_model',columns="ds datetime, most_common_model TEXT")
|
||||
sqlitedb.insert_data('most_model',(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),most_common_model,),columns=('ds','most_common_model',))
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
if is_corr:
|
||||
df = corr_feature(df=df)
|
||||
|
||||
df1 = df.copy() # 备份一下,后面特征筛选完之后加入ds y 列用
|
||||
logger.info(f"开始训练模型...")
|
||||
row,col = df.shape
|
||||
|
||||
now = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
|
||||
# ex_Model(df,
|
||||
# horizon=horizon,
|
||||
# input_size=input_size,
|
||||
# train_steps=train_steps,
|
||||
# val_check_steps=val_check_steps,
|
||||
# early_stop_patience_steps=early_stop_patience_steps,
|
||||
# is_debug=is_debug,
|
||||
# dataset=dataset,
|
||||
# is_train=is_train,
|
||||
# is_fivemodels=is_fivemodels,
|
||||
# val_size=val_size,
|
||||
# test_size=test_size,
|
||||
# settings=settings,
|
||||
# now=now,
|
||||
# etadata = etadata,
|
||||
# modelsindex = modelsindex,
|
||||
# data = data,
|
||||
# is_eta=is_eta,
|
||||
# )
|
||||
|
||||
# # 模型评估
|
||||
model_results3 = model_losss(sqlitedb)
|
||||
# 模型报告
|
||||
|
||||
title = f'{settings}--{now}-预测报告' # 报告标题
|
||||
brent_export_pdf(dataset=dataset,num_models = 5 if is_fivemodels else 22,time=end_time,
|
||||
reportname=reportname,sqlitedb=sqlitedb),
|
||||
# pp_export_pdf(dataset=dataset,num_models = 5 if is_fivemodels else 22,time=end_time,
|
||||
# reportname=reportname),
|
||||
logger.info('模型训练完成')
|
||||
|
||||
# tansuanli_export_pdf(dataset=dataset,num_models = 5 if is_fivemodels else 22,end_time=end_time,reportname=reportname)
|
||||
|
||||
# # LSTM 单变量模型
|
||||
# ex_Lstm(df,input_seq_len=input_size,output_seq_len=horizon,is_debug=is_debug,dataset=dataset)
|
||||
|
||||
# # lstm 多变量模型
|
||||
# ex_Lstm_M(df,n_days=input_size,out_days=horizon,is_debug=is_debug,datasetpath=dataset)
|
||||
|
||||
# # GRU 模型
|
||||
# # ex_GRU(df)
|
||||
|
||||
# 发送邮件
|
||||
m = SendMail(
|
||||
username=username,
|
||||
passwd=passwd,
|
||||
recv=recv,
|
||||
title=title,
|
||||
content=content,
|
||||
file=max(glob.glob(os.path.join(dataset,'*.pdf')), key=os.path.getctime),
|
||||
ssl=ssl,
|
||||
)
|
||||
# m.send_mail()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
predict_main()
|
123
maincanshu.py
Normal file
123
maincanshu.py
Normal file
@ -0,0 +1,123 @@
|
||||
# 读取配置
|
||||
from config_jingbo import *
|
||||
from lib.tools import *
|
||||
from lib.dataread import *
|
||||
from models.nerulforcastmodels import ex_Model,model_losss,brent_export_pdf
|
||||
from models.lstmmodels import ex_Lstm_M,ex_Lstm
|
||||
from models.grumodels import ex_GRU
|
||||
import glob
|
||||
import torch
|
||||
torch.set_float32_matmul_precision("high")
|
||||
|
||||
if __name__ == '__main__':
|
||||
signature = BinanceAPI(APPID, SECRET)
|
||||
|
||||
# 遍历参数训练模型
|
||||
input_size_list = [14]
|
||||
horizon_list = [7]
|
||||
train_steps_list = [500,1000,1500,2000]
|
||||
k_list = [10,18,25,50,100]
|
||||
end_time_list = ['2024-07-03']
|
||||
is_debug = False
|
||||
is_fivemodels = False # 是否使用之前保存的最佳的5个模型
|
||||
delweekenday = True
|
||||
# 组合上面三个参数
|
||||
for i in range(len(input_size_list)):
|
||||
for h in range(len(horizon_list)):
|
||||
for j in range(len(train_steps_list)):
|
||||
for k in range(len(k_list)):
|
||||
for end_time in end_time_list:
|
||||
input_size = input_size_list[i]
|
||||
horizons = horizon_list[h]
|
||||
train_steps = train_steps_list[j]
|
||||
K = k_list[k]
|
||||
settings = f'{input_size}-{horizon_list[h]}-{train_steps}-{K}-{data_set}-{end_time}-{y}'
|
||||
logger.info(f'当前配置:{settings}')
|
||||
|
||||
# 获取数据
|
||||
if is_eta:
|
||||
etadata = EtaReader(signature=signature,
|
||||
classifylisturl = classifylisturl,
|
||||
classifyidlisturl=classifyidlisturl,
|
||||
edbcodedataurl=edbcodedataurl,
|
||||
edbcodelist=edbcodelist
|
||||
)
|
||||
df = etadata.get_eta_api_data(data_set=data_set,dataset=dataset) # 原始数据,未处理
|
||||
else:
|
||||
filename = os.path.join(dataset,data_set)
|
||||
logger.info(f'未启用Eta数据,将读取本地数据{filename}')
|
||||
df = pd.read_excel(filename,sheet_name='指标数据')
|
||||
|
||||
# 数据处理
|
||||
df = datachuli(df=df,dataset=dataset,end_time=end_time,y=y,delweekenday=delweekenday)
|
||||
|
||||
if is_timefurture:
|
||||
df = addtimecharacteristics(df=df,dataset=dataset)
|
||||
|
||||
# 更改预测列名称
|
||||
df.rename(columns={y:'y'},inplace=True)
|
||||
|
||||
logger.info(f"开始训练模型...")
|
||||
row,col = df.shape
|
||||
logger.info(f'当前配置:{settings}')
|
||||
# 获取日期时间 计算今天日期 %Y-%m-%d-%H-%M-%S
|
||||
from datetime import datetime
|
||||
now = datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
|
||||
ex_Model(df,
|
||||
horizon=horizon,
|
||||
input_size=input_size,
|
||||
train_steps=train_steps,
|
||||
val_check_steps=val_check_steps,
|
||||
early_stop_patience_steps=early_stop_patience_steps,
|
||||
is_debug=is_debug,
|
||||
dataset=dataset,
|
||||
is_train=is_train,
|
||||
is_fivemodels=is_fivemodels,
|
||||
val_size=val_size,
|
||||
test_size=test_size,
|
||||
settings=settings,
|
||||
now=now
|
||||
)
|
||||
|
||||
# 模型评估
|
||||
model_results3 = model_losss(df,dataset=dataset,horizon=horizon)
|
||||
# 模型报告
|
||||
|
||||
reportname = f'{settings}--{now}-预测报告.pdf' # 报告文件名
|
||||
reportname = reportname.replace(':', '-') # 替换冒号
|
||||
title = f'{settings}--{now}-预测报告' # 报告标题
|
||||
brent_export_pdf(dataset=dataset,num_models = 5 if is_fivemodels else 22,time=end_time,
|
||||
reportname=reportname),
|
||||
|
||||
# 发送邮件
|
||||
m = SendMail(
|
||||
username=username,
|
||||
passwd=passwd,
|
||||
recv=recv,
|
||||
title=title,
|
||||
content=content,
|
||||
file=max(glob.glob(os.path.join(dataset,reportname)), key=os.path.getctime),
|
||||
ssl=ssl,
|
||||
)
|
||||
# m.send_mail()
|
||||
|
||||
# # LSTM 单变量模型
|
||||
# ex_Lstm(df,input_seq_len=input_size,output_seq_len=horizon,is_debug=is_debug,dataset=dataset)
|
||||
|
||||
# # lstm 多变量模型
|
||||
# ex_Lstm_M(df,n_days=input_size,out_days=horizon,is_debug=is_debug,datasetpath=dataset)
|
||||
|
||||
# # GRU 模型
|
||||
# # ex_GRU(df)
|
||||
|
||||
# 发送邮件
|
||||
# m = SendMail(
|
||||
# username=username,
|
||||
# passwd=passwd,
|
||||
# recv=recv,
|
||||
# title=title,
|
||||
# content=content,
|
||||
# file=max(glob.glob(os.path.join(dataset,'*.pdf')), key=os.path.getctime),
|
||||
# ssl=ssl,
|
||||
# )
|
||||
# m.send_mail()
|
164
models/grumodels.py
Normal file
164
models/grumodels.py
Normal file
@ -0,0 +1,164 @@
|
||||
import os
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib as mpl
|
||||
|
||||
mpl.rcParams['font.family'] = 'SimHei' # 设置字体为黑体
|
||||
import random
|
||||
import string
|
||||
import time
|
||||
import base64
|
||||
import requests
|
||||
from hashlib import sha256
|
||||
from hmac import HMAC
|
||||
from math import sqrt
|
||||
from sklearn.preprocessing import MinMaxScaler
|
||||
from sklearn.metrics import mean_squared_error
|
||||
from keras.models import Sequential
|
||||
from keras.layers import GRU, Dense, Dropout
|
||||
from keras.optimizers import Adam
|
||||
from keras.callbacks import EarlyStopping
|
||||
|
||||
# 数据获取和预处理部分
|
||||
|
||||
from sklearn.preprocessing import MinMaxScaler
|
||||
import pandas as pd
|
||||
|
||||
import datetime
|
||||
import string
|
||||
import base64
|
||||
import requests
|
||||
import random
|
||||
import time
|
||||
import re
|
||||
import hmac
|
||||
import hashlib
|
||||
|
||||
|
||||
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
|
||||
'''
|
||||
将时间序列数据转换为监督学习数据
|
||||
:param data:数据集
|
||||
:param n_in: 输入序列长度,默认为1
|
||||
:param n_out:输出序列长度,默认为1
|
||||
:param dropnan:
|
||||
:return:
|
||||
'''
|
||||
n_vars = 1 if type(data) is list else data.shape[1]
|
||||
df = pd.DataFrame(data)
|
||||
cols, names = list(), list()
|
||||
# input sequence (t-n, ... t-1)
|
||||
# 将3组输入数据依次向下移动3,2,1行,将数据加入cols列表(技巧:(n_in, 0, -1)中的-1指倒序循环,步长为1)
|
||||
for i in range(n_in, 0, -1):
|
||||
cols.append(df.shift(i))
|
||||
names += [('var%d(t-%d)' % (j + 1, i)) for j in range(n_vars)]
|
||||
# forecast sequence (t, t+1, ... t+n)
|
||||
# 将一组输出数据加入cols列表(技巧:其中i=0)
|
||||
for i in range(0, n_out):
|
||||
cols.append(df.shift(-i))
|
||||
if i == 0:
|
||||
names += [('var%d(t)' % (j + 1)) for j in range(n_vars)]
|
||||
else:
|
||||
names += [('var%d(t+%d)' % (j + 1, i)) for j in range(n_vars)]
|
||||
# cols列表(list)中现在有四块经过下移后的数据(即:df(-3),df(-2),df(-1),df),将四块数据按列 并排合并
|
||||
agg = pd.concat(cols, axis=1)
|
||||
# 给合并后的数据添加列名
|
||||
agg.columns = names
|
||||
# 删除NaN值列
|
||||
if dropnan:
|
||||
agg.dropna(inplace=True)
|
||||
return agg
|
||||
|
||||
|
||||
def ex_GRU(df):
|
||||
dataset = df.copy()
|
||||
dataset.set_index('ds', inplace=True)
|
||||
values = dataset.values
|
||||
|
||||
# 标准化/放缩 特征值在(0,1)之间
|
||||
scaler = MinMaxScaler(feature_range=(0, 1))
|
||||
scaled = scaler.fit_transform(values)
|
||||
|
||||
# 数据准备
|
||||
n_days = 14 # 使用过去14天的数据
|
||||
n_features = scaled.shape[1] # 特征数量根据实际数据调整
|
||||
reframed = series_to_supervised(scaled, n_days, 1)
|
||||
|
||||
# 划分训练集和测试集
|
||||
values = reframed.values
|
||||
n_train_days = int(values.shape[0] * 0.8)
|
||||
train = values[:n_train_days, :]
|
||||
test = values[n_train_days:, :]
|
||||
|
||||
# 输入输出数据
|
||||
n_obs = n_days * n_features
|
||||
train_X, train_y = train[:, :n_obs], train[:, -n_features]
|
||||
test_X, test_y = test[:, :n_obs], test[:, -n_features]
|
||||
|
||||
# 输入数据重塑为 [样本数, 时间步长, 特征数]
|
||||
train_X = train_X.reshape((train_X.shape[0], n_days, n_features))
|
||||
test_X = test_X.reshape((test_X.shape[0], n_days, n_features))
|
||||
|
||||
# 构造GRU模型
|
||||
model = Sequential()
|
||||
model.add(GRU(50, return_sequences=True, input_shape=(n_days, n_features)))
|
||||
model.add(Dropout(0.2))
|
||||
model.add(GRU(50))
|
||||
model.add(Dropout(0.2))
|
||||
model.add(Dense(1))
|
||||
|
||||
# 编译模型
|
||||
optimizer = Adam(learning_rate=0.001)
|
||||
model.compile(loss='mean_squared_error', optimizer=optimizer)
|
||||
|
||||
# 定义回调函数
|
||||
early_stopping = EarlyStopping(monitor='val_loss', patience=10)
|
||||
|
||||
# 训练模型
|
||||
history = model.fit(train_X, train_y, epochs=100, batch_size=72, validation_data=(test_X, test_y), verbose=2, shuffle=False, callbacks=[early_stopping])
|
||||
|
||||
# 执行预测
|
||||
yhat = model.predict(test_X)
|
||||
test_X = test_X.reshape((test_X.shape[0], n_days * n_features))
|
||||
|
||||
# 将预测列和真实列数据逆归一化
|
||||
inv_yhat = np.concatenate((yhat, test_X[:, -n_features+1:]), axis=1)
|
||||
inv_yhat = scaler.inverse_transform(inv_yhat)
|
||||
inv_yhat = inv_yhat[:, 0]
|
||||
|
||||
test_y = test_y.reshape((len(test_y), 1))
|
||||
inv_y = np.concatenate((test_y, test_X[:, -n_features+1:]), axis=1)
|
||||
inv_y = scaler.inverse_transform(inv_y)
|
||||
inv_y = inv_y[:, 0]
|
||||
|
||||
# 计算RMSE
|
||||
rmse = sqrt(mean_squared_error(inv_y, inv_yhat))
|
||||
print('Test RMSE: %.3f' % rmse)
|
||||
|
||||
# 可视化结果
|
||||
n = 150
|
||||
time_axis_data = np.array(range(n))
|
||||
time_axis_future = np.array(range(n + 7))
|
||||
inv_y = inv_y[-n:]
|
||||
inv_yhat = inv_yhat[-n-7:]
|
||||
|
||||
fig, ax = plt.subplots(2, 1, gridspec_kw={'height_ratios': [5, 4]})
|
||||
fig.set_size_inches(8, 6)
|
||||
|
||||
ax[0].plot(time_axis_data, inv_y, label='历史价格')
|
||||
ax[0].plot(time_axis_future, inv_yhat, linestyle='dashed', label='预测价格')
|
||||
ax[0].set_xlabel('时间')
|
||||
ax[0].set_ylabel('价格')
|
||||
ax[0].legend()
|
||||
ax[0].set_title('布伦特_多价格预测')
|
||||
ax[0].set_ylim(min(inv_y[-n - 7:]) * 0.4, max(inv_y[-n - 7:]) * 1.6)
|
||||
|
||||
ax[1].axis('off')
|
||||
table_data = [[f"Day {i + 1}", "{:.2f}".format(val)] for i, val in enumerate(inv_yhat[-7:])]
|
||||
table = ax[1].table(cellText=table_data, colLabels=['Day', 'Prediction'], loc='center')
|
||||
table.auto_set_font_size(True)
|
||||
filename = os.path.basename(__file__).split('.')[0]
|
||||
|
||||
plt.savefig(filename + '.png')
|
||||
plt.show()
|
255
models/lstmmodels.py
Normal file
255
models/lstmmodels.py
Normal file
@ -0,0 +1,255 @@
|
||||
import numpy as np
|
||||
from sklearn.preprocessing import MinMaxScaler
|
||||
from sklearn.metrics import mean_squared_error
|
||||
from sklearn.model_selection import train_test_split
|
||||
from tensorflow.keras.models import Sequential
|
||||
from tensorflow.keras.layers import LSTM, Dense
|
||||
import datetime
|
||||
import matplotlib.pyplot as plt
|
||||
import pandas as pd
|
||||
import os
|
||||
import random
|
||||
import string
|
||||
import time
|
||||
import base64
|
||||
from hashlib import sha256
|
||||
from hmac import HMAC
|
||||
import requests
|
||||
import csv
|
||||
from numpy import concatenate
|
||||
from math import sqrt
|
||||
|
||||
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
|
||||
'''
|
||||
将时间序列数据转换为监督学习数据
|
||||
:param data:数据集
|
||||
:param n_in: 输入序列长度,默认为1
|
||||
:param n_out:输出序列长度,默认为1
|
||||
:param dropnan:
|
||||
:return:
|
||||
'''
|
||||
n_vars = 1 if type(data) is list else data.shape[1]
|
||||
df = pd.DataFrame(data)
|
||||
cols, names = list(), list()
|
||||
# input sequence (t-n, ... t-1)
|
||||
# 将3组输入数据依次向下移动3,2,1行,将数据加入cols列表(技巧:(n_in, 0, -1)中的-1指倒序循环,步长为1)
|
||||
for i in range(n_in, 0, -1):
|
||||
cols.append(df.shift(i))
|
||||
names += [('var%d(t-%d)' % (j + 1, i)) for j in range(n_vars)]
|
||||
# forecast sequence (t, t+1, ... t+n)
|
||||
# 将一组输出数据加入cols列表(技巧:其中i=0)
|
||||
for i in range(0, n_out):
|
||||
cols.append(df.shift(-i))
|
||||
if i == 0:
|
||||
names += [('var%d(t)' % (j + 1)) for j in range(n_vars)]
|
||||
else:
|
||||
names += [('var%d(t+%d)' % (j + 1, i)) for j in range(n_vars)]
|
||||
# cols列表(list)中现在有四块经过下移后的数据(即:df(-3),df(-2),df(-1),df),将四块数据按列 并排合并
|
||||
agg = pd.concat(cols, axis=1)
|
||||
# 给合并后的数据添加列名
|
||||
agg.columns = names
|
||||
# 删除NaN值列
|
||||
if dropnan:
|
||||
agg.dropna(inplace=True)
|
||||
return agg
|
||||
|
||||
def createXY(dataset,n_past):
|
||||
dataX = []
|
||||
dataY = []
|
||||
print(dataset.shape[1])
|
||||
for i in range(n_past, len(dataset)):
|
||||
dataX.append(dataset[i - n_past:i, 0:dataset.shape[1]])
|
||||
dataY.append(dataset[i,0])
|
||||
return np.array(dataX),np.array(dataY)
|
||||
|
||||
def ex_Lstm_M(df,n_days=14,out_days=7,is_debug=False,datasetpath=''):
|
||||
# dataset = pd.read_csv('brentpricepredict.csv',encoding='utf-8')
|
||||
dataset = df.copy()
|
||||
dataset.set_index('ds', inplace=True)
|
||||
|
||||
values = dataset.values
|
||||
if is_debug:
|
||||
# values = values[-1000:]
|
||||
pass
|
||||
# 标准化/放缩 特征值在(0,1)之间
|
||||
scaler = MinMaxScaler(feature_range=(0, 1))
|
||||
scaled = scaler.fit_transform(values)
|
||||
# 用14天数据预测七天数据
|
||||
n_features = dataset.shape[1]
|
||||
# 构造一个14->7的监督学习型数据
|
||||
reframed = series_to_supervised(scaled, n_days, out_days)
|
||||
|
||||
# 切分数据集
|
||||
values = reframed.values
|
||||
# 用80%的数据来训练,20%的数据来测试
|
||||
n_train = int(len(dataset) * 0.8)
|
||||
train = values[:n_train, :]
|
||||
test = values[n_train:, :]
|
||||
# 切分输入输出
|
||||
n_obs = n_days * n_features
|
||||
# 倒数第19列作为Y
|
||||
train_X, train_y = train[:, :n_obs], train[:, -n_features]
|
||||
test_X, test_y = test[:, :n_obs], test[:, -n_features]
|
||||
# 将数据转换为3D输入,timesteps=14,14条数据预测7条 [samples, timesteps, features]
|
||||
train_X = train_X.reshape((train_X.shape[0], n_days, n_features))
|
||||
test_X = test_X.reshape((test_X.shape[0], n_days, n_features))
|
||||
print(train_X.shape, train_y.shape, test_X.shape, test_y.shape)
|
||||
|
||||
# 设计网络
|
||||
model = Sequential()
|
||||
model.add(LSTM(50, input_shape=(train_X.shape[1], train_X.shape[2])))
|
||||
model.add(Dense(1))
|
||||
model.compile(loss='mae', optimizer='adam')
|
||||
# 拟合网络
|
||||
history = model.fit(train_X, train_y, epochs=100, batch_size=72, validation_data=(test_X, test_y), verbose=2,
|
||||
shuffle=False)
|
||||
|
||||
# 执行预测
|
||||
yhat = model.predict(test_X)
|
||||
# 将数据格式化成 n行 * 24列
|
||||
test_X = test_X.reshape((test_X.shape[0], n_days * n_features))
|
||||
# 将预测列据和后7列数据拼接,因后续逆缩放时,数据形状要符合 n行*20列 的要求
|
||||
inv_yhat = concatenate((yhat, test_X[:, -n_features+1:]), axis=1)
|
||||
# 对拼接好的数据进行逆缩放
|
||||
inv_yhat = scaler.inverse_transform(inv_yhat)
|
||||
inv_yhat = inv_yhat[:, 0]
|
||||
print(inv_yhat)
|
||||
|
||||
test_y = test_y.reshape((len(test_y), 1))
|
||||
# 将真实列据和后7列数据拼接,因后续逆缩放时,数据形状要符合 n行*20列 的要求
|
||||
inv_y = concatenate((test_y, test_X[:, -n_features+1:]), axis=1)
|
||||
# 对拼接好的数据进行逆缩放
|
||||
inv_y = scaler.inverse_transform(inv_y)
|
||||
inv_y = inv_y[:, 0]
|
||||
|
||||
# 计算RMSE
|
||||
rmse = sqrt(mean_squared_error(inv_y, inv_yhat))
|
||||
print('Test RMSE: %.3f' % rmse)
|
||||
|
||||
# 可视化结果
|
||||
# 保留n天历史数据
|
||||
n = len(inv_y) - 7
|
||||
# 设置要可视化的值
|
||||
time_axis_data = np.array(range(n))
|
||||
time_axis_future = np.array(range(n + 7))
|
||||
inv_y = inv_y[-n:]
|
||||
inv_yhat = inv_yhat[-n-7:]
|
||||
|
||||
# Plot data and future predictions
|
||||
fig, ax = plt.subplots(2, 1, gridspec_kw={'height_ratios': [5, 4]})
|
||||
# 设置画布大小
|
||||
fig.set_size_inches(6, 6)
|
||||
# 第一个子图画历史价格和预测价格
|
||||
ax[0].plot(time_axis_data, inv_y, label='历史价格')
|
||||
ax[0].plot(time_axis_future, inv_yhat, linestyle='dashed', label='预测价格')
|
||||
ax[0].set_xlabel('时间')
|
||||
ax[0].set_ylabel('价格')
|
||||
ax[0].legend()
|
||||
# 设置标题
|
||||
ax[0].set_title('布伦特_多价格预测')
|
||||
# 设置y轴范围
|
||||
ax[0].set_ylim(50, 120)
|
||||
|
||||
# 第二个子图画表格,展示预测价格
|
||||
ax[1].axis('off')
|
||||
table_data = [[f"Day {i + 1}", "{:.2f}".format(val)] for i, val in enumerate(inv_yhat[-7:])]
|
||||
table = ax[1].table(cellText=table_data, colLabels=['Day', 'Prediction'], loc='center')
|
||||
# 设置表格内容居中
|
||||
table.auto_set_font_size(True)
|
||||
# 保存图片
|
||||
filename = os.path.basename(__file__).split('.')[0]
|
||||
|
||||
plt.savefig(os.path.join(datasetpath,filename + '_M.png'))
|
||||
# plt.show()
|
||||
|
||||
|
||||
def ex_Lstm(df,input_seq_len=50, output_seq_len=7,is_debug=False,dataset=''):
|
||||
|
||||
# 将日期列转换为 datetime 类型(如果尚未转换)
|
||||
df['ds'] = pd.to_datetime(df['ds'])
|
||||
# 分离出数值列(排除日期列)
|
||||
numeric_df = df.select_dtypes(include=['int64', 'float64'])
|
||||
|
||||
prices = df
|
||||
# prices = df
|
||||
# print(data1)
|
||||
# Remove any NaN values
|
||||
df = df.drop('ds', axis=1)
|
||||
prices = np.array(df, dtype=float) # convert to NumPy array of floats
|
||||
prices = prices[~np.isnan(prices)]
|
||||
if is_debug:
|
||||
prices = prices[-300:]
|
||||
|
||||
|
||||
# Prepare input sequences
|
||||
inputs = []
|
||||
for i in range(len(prices)-input_seq_len-output_seq_len+1):
|
||||
inputs.append(prices[i:i+input_seq_len])
|
||||
inputs = np.array(inputs)
|
||||
|
||||
# Prepare output sequences
|
||||
outputs = []
|
||||
for i in range(input_seq_len, len(prices)-output_seq_len+1):
|
||||
outputs.append(prices[i:i+output_seq_len])
|
||||
outputs = np.array(outputs)
|
||||
|
||||
# Split dataset into training and testing sets
|
||||
X_train, X_test, y_train, y_test = train_test_split(inputs, outputs, test_size=0.2)
|
||||
|
||||
# Normalize data
|
||||
scaler_in = MinMaxScaler()
|
||||
X_train = scaler_in.fit_transform(X_train)
|
||||
X_test = scaler_in.transform(X_test)
|
||||
|
||||
scaler_out = MinMaxScaler()
|
||||
y_train = scaler_out.fit_transform(y_train)
|
||||
y_test = scaler_out.transform(y_test)
|
||||
|
||||
# Define LSTM model
|
||||
model = Sequential()
|
||||
model.add(LSTM(128, activation='relu', input_shape=(input_seq_len, 1)))
|
||||
model.add(Dense(output_seq_len))
|
||||
model.compile(optimizer='adam', loss='mse')
|
||||
|
||||
# Train LSTM model
|
||||
model.fit(X_train.reshape(-1, input_seq_len, 1), y_train, epochs=100, batch_size=64, validation_data=(X_test.reshape(-1, input_seq_len, 1), y_test))
|
||||
|
||||
# Evaluate LSTM model
|
||||
mse = model.evaluate(X_test.reshape(-1, input_seq_len, 1), y_test)
|
||||
|
||||
# Make future predictions
|
||||
future_inputs = np.array([prices[-input_seq_len:]])
|
||||
future_inputs = scaler_in.transform(future_inputs)
|
||||
future_predictions = model.predict(future_inputs.reshape(-1, input_seq_len, 1))
|
||||
future_predictions = scaler_out.inverse_transform(future_predictions)[0]
|
||||
|
||||
# Print results
|
||||
print("MSE: ", mse)
|
||||
print("Future predictions: ", future_predictions)
|
||||
|
||||
# Generate time axis for data and future predictions
|
||||
time_axis_data = np.arange(len(prices))
|
||||
time_axis_future = np.arange(len(prices), len(prices) + len(future_predictions))
|
||||
|
||||
# Concatenate time axis and data
|
||||
time_axis = np.concatenate((time_axis_data, time_axis_future))
|
||||
|
||||
# Concatenate data and future predictions
|
||||
data_and_predictions = np.concatenate((prices, future_predictions))
|
||||
|
||||
# Plot data and future predictions
|
||||
fig, ax = plt.subplots(2, 1, gridspec_kw={'height_ratios': [3, 1]})
|
||||
|
||||
# First subplot: Data and Future Predictions
|
||||
ax[0].plot(time_axis, data_and_predictions, label='Data and Future Predictions')
|
||||
ax[0].plot(time_axis_future, future_predictions, linestyle='dashed', label='Future Predictions')
|
||||
ax[0].set_xlabel('Time')
|
||||
ax[0].set_ylabel('Price')
|
||||
ax[0].legend()
|
||||
|
||||
# Second subplot: Table for Future Predictions
|
||||
ax[1].axis('off')
|
||||
table_data = [[f"Day {i+1}", "{:.2f}".format(val)] for i, val in enumerate(future_predictions)]
|
||||
table = ax[1].table(cellText=table_data, colLabels=['Day', 'Prediction'], loc='center')
|
||||
plt.savefig(os.path.join(dataset,'lstmmodels.png'))
|
||||
# plt.show()
|
1519
models/nerulforcastmodels.py
Normal file
1519
models/nerulforcastmodels.py
Normal file
File diff suppressed because it is too large
Load Diff
104
pushdata.py
Normal file
104
pushdata.py
Normal file
@ -0,0 +1,104 @@
|
||||
# 读取配置
|
||||
from config_jingbo import *
|
||||
# from config_tansuanli import *
|
||||
from lib.tools import *
|
||||
from lib.dataread import *
|
||||
from models.nerulforcastmodels import ex_Model,model_losss,brent_export_pdf,tansuanli_export_pdf
|
||||
from models.lstmmodels import ex_Lstm_M,ex_Lstm
|
||||
from models.grumodels import ex_GRU
|
||||
import glob
|
||||
import torch
|
||||
torch.set_float32_matmul_precision("high")
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
signature = BinanceAPI(APPID, SECRET)
|
||||
etadata = EtaReader(signature=signature,
|
||||
classifylisturl = classifylisturl,
|
||||
classifyidlisturl=classifyidlisturl,
|
||||
edbcodedataurl=edbcodedataurl,
|
||||
edbcodelist=edbcodelist,
|
||||
edbdatapushurl = edbdatapushurl,
|
||||
edbdeleteurl = edbdeleteurl,
|
||||
edbbusinessurl = edbbusinessurl
|
||||
)
|
||||
|
||||
models = [
|
||||
'NHITS',
|
||||
'Informer',
|
||||
'LSTM',
|
||||
'iTransformer',
|
||||
'TSMixer',
|
||||
'TSMixerx',
|
||||
'PatchTST',
|
||||
'RNN',
|
||||
'GRU',
|
||||
'TCN',
|
||||
'BiTCN',
|
||||
'DilatedRNN',
|
||||
'MLP',
|
||||
'DLinear',
|
||||
'NLinear',
|
||||
'TFT',
|
||||
'FEDformer',
|
||||
'StemGNN',
|
||||
'MLPMultivariate',
|
||||
'TiDE',
|
||||
'DeepNPT']
|
||||
|
||||
# eta自由数据指标编码
|
||||
modelsindex = {
|
||||
'NHITS': 'SELF0000001',
|
||||
'Informer':'SELF0000057',
|
||||
'LSTM':'SELF0000058',
|
||||
'iTransformer':'SELF0000059',
|
||||
'TSMixer':'SELF0000060',
|
||||
'TSMixerx':'SELF0000061',
|
||||
'PatchTST':'SELF0000062',
|
||||
'RNN':'SELF0000063',
|
||||
'GRU':'SELF0000064',
|
||||
'TCN':'SELF0000065',
|
||||
'BiTCN':'SELF0000066',
|
||||
'DilatedRNN':'SELF0000067',
|
||||
'MLP':'SELF0000068',
|
||||
'DLinear':'SELF0000069',
|
||||
'NLinear':'SELF0000070',
|
||||
'TFT':'SELF0000071',
|
||||
'FEDformer':'SELF0000072',
|
||||
'StemGNN':'SELF0000073',
|
||||
'MLPMultivariate':'SELF0000074',
|
||||
'TiDE':'SELF0000075',
|
||||
'DeepNPT':'SELF0000076'
|
||||
}
|
||||
|
||||
# df_predict = pd.read_csv('dataset/predict.csv',encoding='gbk')
|
||||
# # df_predict.rename(columns={'ds':'Date'},inplace=True)
|
||||
# for m in modelsindex.keys():
|
||||
# list = []
|
||||
# for date,value in zip(df_predict['ds'],df_predict[m]):
|
||||
# list.append({'Date':date,'Value':value})
|
||||
# data['DataList'] = list
|
||||
# data['IndexCode'] = modelsindex[m]
|
||||
# data['IndexName'] = f'价格预测{m}模型'
|
||||
# data['Remark'] = m
|
||||
# # print(data['DataList'])
|
||||
# etadata.push_data(data)
|
||||
|
||||
# 删除指标
|
||||
# IndexCodeList = ['SELF0000055']
|
||||
# for i in range(1,57):
|
||||
# if i < 10 : i = f'0{i}'
|
||||
# IndexCodeList.append(f'SELF00000{i}')
|
||||
# print(IndexCodeList)
|
||||
# etadata.del_zhibiao(IndexCodeList)
|
||||
|
||||
# 删除特定日期的值
|
||||
indexcodelist = modelsindex.values()
|
||||
for indexcode in indexcodelist:
|
||||
data = {
|
||||
"IndexCode": indexcode, #指标编码
|
||||
"StartDate": "2020-04-20", #指标需要删除的开始日期(>=),如果开始日期和结束日期相等,那么就是删除该日期
|
||||
"EndDate": "2024-05-28" #指标需要删除的结束日期(<=),如果开始日期和结束日期相等,那么就是删除该日期
|
||||
}
|
||||
|
||||
# etadata.del_business(data)
|
62
八个维度demo copy.py
Normal file
62
八个维度demo copy.py
Normal file
@ -0,0 +1,62 @@
|
||||
import logging
|
||||
import os
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
from neuralforecast import NeuralForecast
|
||||
from neuralforecast.models import NHITS
|
||||
from neuralforecast.utils import AirPassengersPanel
|
||||
from mlforecast.utils import PredictionIntervals
|
||||
from neuralforecast.losses.pytorch import DistributionLoss, MAE
|
||||
|
||||
os.environ['NIXTLA_ID_AS_COL'] = '1'
|
||||
|
||||
|
||||
AirPassengersPanel_train = AirPassengersPanel[AirPassengersPanel['ds'] < AirPassengersPanel['ds'].values[-12]].reset_index(drop=True)
|
||||
AirPassengersPanel_test = AirPassengersPanel[AirPassengersPanel['ds'] >= AirPassengersPanel['ds'].values[-12]].reset_index(drop=True)
|
||||
AirPassengersPanel_test['y'] = np.nan
|
||||
AirPassengersPanel_test['y_[lag12]'] = np.nan
|
||||
|
||||
|
||||
horizon = 12
|
||||
input_size = 24
|
||||
|
||||
prediction_intervals = PredictionIntervals()
|
||||
|
||||
models = [NHITS(h=horizon, input_size=input_size, max_steps=100, loss=MAE(), scaler_type="robust"),
|
||||
NHITS(h=horizon, input_size=input_size, max_steps=100, loss=DistributionLoss("Normal", level=[90]), scaler_type="robust")]
|
||||
nf = NeuralForecast(models=models, freq='ME')
|
||||
nf.fit(AirPassengersPanel_train, prediction_intervals=prediction_intervals)
|
||||
|
||||
|
||||
preds = nf.predict(futr_df=AirPassengersPanel_test, level=[90])
|
||||
|
||||
fig, (ax1, ax2) = plt.subplots(2, 1, figsize = (20, 7))
|
||||
plot_df = pd.concat([AirPassengersPanel_train, preds])
|
||||
|
||||
plot_df = plot_df[plot_df['unique_id']=='Airline1'].drop(['unique_id','trend','y_[lag12]'], axis=1).iloc[-50:]
|
||||
|
||||
ax1.plot(plot_df['ds'], plot_df['y'], c='black', label='True')
|
||||
ax1.plot(plot_df['ds'], plot_df['NHITS'], c='blue', label='median')
|
||||
ax1.fill_between(x=plot_df['ds'][-12:],
|
||||
y1=plot_df['NHITS-lo-90'][-12:].values,
|
||||
y2=plot_df['NHITS-hi-90'][-12:].values,
|
||||
alpha=0.4, label='level 90')
|
||||
ax1.set_title('AirPassengers Forecast - Uncertainty quantification using Conformal Prediction', fontsize=18)
|
||||
ax1.set_ylabel('Monthly Passengers', fontsize=15)
|
||||
ax1.set_xticklabels([])
|
||||
ax1.legend(prop={'size': 10})
|
||||
ax1.grid()
|
||||
|
||||
ax2.plot(plot_df['ds'], plot_df['y'], c='black', label='True')
|
||||
ax2.plot(plot_df['ds'], plot_df['NHITS1'], c='blue', label='median')
|
||||
ax2.fill_between(x=plot_df['ds'][-12:],
|
||||
y1=plot_df['NHITS1-lo-90'][-12:].values,
|
||||
y2=plot_df['NHITS1-hi-90'][-12:].values,
|
||||
alpha=0.4, label='level 90')
|
||||
ax2.set_title('AirPassengers Forecast - Uncertainty quantification using Normal distribution', fontsize=18)
|
||||
ax2.set_ylabel('Monthly Passengers', fontsize=15)
|
||||
ax2.set_xlabel('Timestamp [t]', fontsize=15)
|
||||
ax2.legend(prop={'size': 10})
|
||||
ax2.grid()
|
200
八个维度demo.py
Normal file
200
八个维度demo.py
Normal file
@ -0,0 +1,200 @@
|
||||
import pandas as pd
|
||||
from datasetsforecast.long_horizon import LongHorizon
|
||||
|
||||
# Change this to your own data to try the model
|
||||
Y_df, _, _ = LongHorizon.load(directory='./', group='ETTm2')
|
||||
Y_df['ds'] = pd.to_datetime(Y_df['ds'])
|
||||
|
||||
# For this excercise we are going to take 20% of the DataSet
|
||||
n_time = len(Y_df.ds.unique())
|
||||
val_size = int(.2 * n_time)
|
||||
test_size = int(.2 * n_time)
|
||||
|
||||
Y_df.groupby('unique_id').head(2)
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
# We are going to plot the temperature of the transformer
|
||||
# and marking the validation and train splits
|
||||
u_id = 'HUFL'
|
||||
x_plot = pd.to_datetime(Y_df[Y_df.unique_id==u_id].ds)
|
||||
y_plot = Y_df[Y_df.unique_id==u_id].y.values
|
||||
|
||||
x_val = x_plot[n_time - val_size - test_size]
|
||||
x_test = x_plot[n_time - test_size]
|
||||
|
||||
fig = plt.figure(figsize=(10, 5))
|
||||
fig.tight_layout()
|
||||
|
||||
plt.plot(x_plot, y_plot)
|
||||
plt.xlabel('Date', fontsize=17)
|
||||
plt.ylabel('HUFL [15 min temperature]', fontsize=17)
|
||||
|
||||
plt.axvline(x_val, color='black', linestyle='-.')
|
||||
plt.axvline(x_test, color='black', linestyle='-.')
|
||||
plt.text(x_val, 5, ' Validation', fontsize=12)
|
||||
plt.text(x_test, 5, ' Test', fontsize=12)
|
||||
|
||||
plt.grid()
|
||||
|
||||
|
||||
from ray import tune
|
||||
from neuralforecast.auto import AutoNHITS
|
||||
from neuralforecast.core import NeuralForecast
|
||||
|
||||
horizon = 96 # 24hrs = 4 * 15 min.
|
||||
|
||||
# Use your own config or AutoNHITS.default_config
|
||||
nhits_config = {
|
||||
"learning_rate": tune.choice([1e-3]), # Initial Learning rate
|
||||
"max_steps": tune.choice([1000]), # Number of SGD steps
|
||||
"input_size": tune.choice([5 * horizon]), # input_size = multiplier * horizon
|
||||
"batch_size": tune.choice([7]), # Number of series in windows
|
||||
"windows_batch_size": tune.choice([256]), # Number of windows in batch
|
||||
"n_pool_kernel_size": tune.choice([[2, 2, 2], [16, 8, 1]]), # MaxPool's Kernelsize
|
||||
"n_freq_downsample": tune.choice([[168, 24, 1], [24, 12, 1], [1, 1, 1]]), # Interpolation expressivity ratios
|
||||
"activation": tune.choice(['ReLU']), # Type of non-linear activation
|
||||
"n_blocks": tune.choice([[1, 1, 1]]), # Blocks per each 3 stacks
|
||||
"mlp_units": tune.choice([[[512, 512], [512, 512], [512, 512]]]), # 2 512-Layers per block for each stack
|
||||
"interpolation_mode": tune.choice(['linear']), # Type of multi-step interpolation
|
||||
"val_check_steps": tune.choice([100]), # Compute validation every 100 epochs
|
||||
"random_seed": tune.randint(1, 10),
|
||||
}
|
||||
|
||||
tft_config = {
|
||||
"input_size": tune.choice([horizon]),
|
||||
"hidden_size": tune.choice([32]),
|
||||
"n_head": tune.choice([2]),
|
||||
"learning_rate": tune.loguniform(1e-4, 1e-1),
|
||||
"scaler_type": tune.choice(['robust', 'standard']),
|
||||
"max_steps": tune.choice([500, 1000]),
|
||||
"windows_batch_size": tune.choice([32]),
|
||||
"check_val_every_n_epoch": tune.choice([100]),
|
||||
"random_seed": tune.randint(1, 20),
|
||||
}
|
||||
|
||||
|
||||
tsmixer_config = {
|
||||
"input_size": input_size, # Size of input window
|
||||
"max_steps": tune.choice([500, 1000, 2000]), # Number of training iterations
|
||||
"val_check_steps": 100, # Compute validation every x steps
|
||||
"early_stop_patience_steps": 5, # Early stopping steps
|
||||
"learning_rate": tune.loguniform(1e-4, 1e-2), # Initial Learning rate
|
||||
"n_block": tune.choice([1, 2, 4, 6, 8]), # Number of mixing layers
|
||||
"dropout": tune.uniform(0.0, 0.99), # Dropout
|
||||
"ff_dim": tune.choice([32, 64, 128]), # Dimension of the feature linear layer
|
||||
"scaler_type": 'identity',
|
||||
}
|
||||
|
||||
tsmixerx_config = tsmixer_config.copy()
|
||||
tsmixerx_config['futr_exog_list'] = ['ex_1', 'ex_2', 'ex_3', 'ex_4']
|
||||
|
||||
models = [AutoNHITS(h=horizon,
|
||||
config=nhits_config,
|
||||
num_samples=5),
|
||||
AutoTFT(h=horizon,
|
||||
loss=MAE(),
|
||||
config=tft_config,
|
||||
num_samples=3),
|
||||
TSMixer(h=horizon,
|
||||
input_size=input_size,
|
||||
n_series=7,
|
||||
max_steps=1000,
|
||||
val_check_steps=100,
|
||||
early_stop_patience_steps=5,
|
||||
scaler_type='identity',
|
||||
valid_loss=MAE(),
|
||||
random_seed=12345678,
|
||||
),
|
||||
TSMixerx(h=horizon,
|
||||
input_size=input_size,
|
||||
n_series=7,
|
||||
max_steps=1000,
|
||||
val_check_steps=100,
|
||||
early_stop_patience_steps=5,
|
||||
scaler_type='identity',
|
||||
dropout=0.7,
|
||||
valid_loss=MAE(),
|
||||
random_seed=12345678,
|
||||
futr_exog_list=['ex_1', 'ex_2', 'ex_3', 'ex_4'],
|
||||
),
|
||||
MLPMultivariate(h=horizon,
|
||||
input_size=input_size,
|
||||
n_series=7,
|
||||
max_steps=1000,
|
||||
val_check_steps=100,
|
||||
early_stop_patience_steps=5,
|
||||
scaler_type='standard',
|
||||
hidden_size=256,
|
||||
valid_loss=MAE(),
|
||||
random_seed=12345678,
|
||||
),
|
||||
NHITS(h=horizon,
|
||||
input_size=horizon,
|
||||
max_steps=1000,
|
||||
val_check_steps=100,
|
||||
early_stop_patience_steps=5,
|
||||
scaler_type='robust',
|
||||
valid_loss=MAE(),
|
||||
random_seed=12345678,
|
||||
),
|
||||
AutoTSMixer(h=horizon,
|
||||
n_series=7,
|
||||
loss=MAE(),
|
||||
config=tsmixer_config,
|
||||
num_samples=10,
|
||||
search_alg=HyperOptSearch(),
|
||||
backend='ray',
|
||||
valid_loss=MAE()) ,
|
||||
AutoTSMixerx(h=horizon,
|
||||
n_series=7,
|
||||
loss=MAE(),
|
||||
config=tsmixerx_config,
|
||||
num_samples=10,
|
||||
search_alg=HyperOptSearch(),
|
||||
backend='ray',
|
||||
valid_loss=MAE()) ]
|
||||
|
||||
nf = NeuralForecast(
|
||||
models=models,
|
||||
freq='15min')
|
||||
|
||||
Y_hat_df = nf.cross_validation(df=Y_df, val_size=val_size,
|
||||
test_size=test_size, n_windows=None)
|
||||
nf.models[0].results.get_best_result().config
|
||||
y_true = Y_hat_df.y.values
|
||||
y_hat = Y_hat_df['AutoNHITS'].values
|
||||
|
||||
n_series = len(Y_df.unique_id.unique())
|
||||
|
||||
y_true = y_true.reshape(n_series, -1, horizon)
|
||||
y_hat = y_hat.reshape(n_series, -1, horizon)
|
||||
|
||||
print('Parsed results')
|
||||
print('2. y_true.shape (n_series, n_windows, n_time_out):\t', y_true.shape)
|
||||
print('2. y_hat.shape (n_series, n_windows, n_time_out):\t', y_hat.shape)
|
||||
|
||||
|
||||
fig, axs = plt.subplots(nrows=3, ncols=1, figsize=(10, 11))
|
||||
fig.tight_layout()
|
||||
|
||||
series = ['HUFL','HULL','LUFL','LULL','MUFL','MULL','OT']
|
||||
series_idx = 3
|
||||
|
||||
for idx, w_idx in enumerate([200, 300, 400]):
|
||||
axs[idx].plot(y_true[series_idx, w_idx,:],label='True')
|
||||
axs[idx].plot(y_hat[series_idx, w_idx,:],label='Forecast')
|
||||
axs[idx].grid()
|
||||
axs[idx].set_ylabel(series[series_idx]+f' window {w_idx}',
|
||||
fontsize=17)
|
||||
if idx==2:
|
||||
axs[idx].set_xlabel('Forecast Horizon', fontsize=17)
|
||||
plt.legend()
|
||||
plt.show()
|
||||
plt.close()
|
||||
|
||||
|
||||
from neuralforecast.losses.numpy import mae, mse
|
||||
|
||||
print('MAE: ', mae(y_hat, y_true))
|
||||
print('MSE: ', mse(y_hat, y_true))
|
14
原油预测定时任务,请勿关闭.py
Normal file
14
原油预测定时任务,请勿关闭.py
Normal file
@ -0,0 +1,14 @@
|
||||
# 定时执行cmd命令
|
||||
import os
|
||||
import time
|
||||
from main import predict_main
|
||||
|
||||
while True:
|
||||
try:
|
||||
print(time.strftime('%H:%M'))
|
||||
# 判断是不是工作日且 是17:00 7:00 才执行
|
||||
if time.strftime('%A') not in ['Saturday', 'Sunday'] and time.strftime('%H:%M') in [ '18:00']:
|
||||
predict_main()
|
||||
time.sleep(60)
|
||||
except:
|
||||
pass
|
Loading…
Reference in New Issue
Block a user