This commit is contained in:
workpc 2024-11-01 16:38:21 +08:00
parent 296db87abc
commit c76cebdec6
18 changed files with 5369 additions and 0 deletions

55
aa copy.py Normal file
View File

@ -0,0 +1,55 @@
# 统计特征频度
# 读取文件
import pandas as pd
df = pd.read_csv("D:\code\huarongqiming\碳酸锂合并数据.csv",encoding='gbk')
df['ds'] = pd.to_datetime(df['ds'])
# 按ds正序排序重置索引
df = df.sort_values(by='ds', ascending=True).reset_index(drop=True)
# 统计特征频度
# 每列随机抽取6个值计算出5个时间间隔统计每个时间间隔的频度
columns = df.columns.to_list()
columns.remove('ds')
count_dict = {}
for column in columns:
# 获取每列时间间隔
values = df[[column,'ds']]
values.dropna(inplace=True,axis=0)
values=values.reset_index(drop=True)
# 抽取10个值
value = values.sample(10)
index = value.index
next_index = index + 1
count = []
for i,j in zip(index, next_index):
#通过索引计算日期差
try:
count.append((values.loc[j,'ds'] - values.loc[i,'ds']).days)
except:
pass
# 把31 换成 30
count = [30 if i == 31 else i for i in count]
# 保留count中出现次数最多的数
count = max(set(count), key=count.count)
# 存储到字典中
count_dict[column] = count
df = pd.DataFrame(count_dict,index=['count']).T
pindu_dfs = pd.DataFrame()
# 根据count分组
# 输出特征频度统计
pindudict = {'1':'日度','7':'周度','30':'月度','90':'季度','180':'半年度','365':'年度'}
for i in df.groupby('count'):
# 获取 i[1] 的索引值
index = i[1].index
pindu_df = pd.DataFrame()
pindu_df[pindudict[str(i[0])]+f'({len(i[1])})'] = index
# 合并到pindu_dfs
pindu_dfs = pd.concat([pindu_dfs,pindu_df],axis=1)
# nan替换为 ' '
pindu_dfs = pindu_dfs.fillna('')
pindu_dfs.to_csv('D:\code\huarongqiming\pindu.csv',index=False)
print(pindu_dfs)
print('*'*200)

10
aa.py Normal file
View File

@ -0,0 +1,10 @@
# 定时执行cmd命令
import os
import time
while True:
print(time.strftime('%H:%M'))
# 判断是不是工作日且 是1700 700 才执行
if time.strftime('%A') not in ['Saturday', 'Sunday'] and time.strftime('%H:%M') in [ '07:00']:
os.system(' D:/ProgramData/anaconda3/python.exe main.py')
time.sleep(60)

254
config_jingbo.py Normal file
View File

@ -0,0 +1,254 @@
import logging
import os
import logging.handlers
import datetime
# eta 接口token
APPID = "XNLDvxZHHugj7wJ7"
SECRET = "iSeU4s6cKKBVbt94htVY1p0sqUMqb2xa"
# eta 接口url
sourcelisturl = 'http://10.189.2.78:8108/v1/edb/source/list'
classifylisturl = 'http://10.189.2.78:8108/v1/edb/classify/list?ClassifyType='
uniquecodedataurl = 'http://10.189.2.78:8108/v1/edb/data?UniqueCode=4991c37becba464609b409909fe4d992&StartDate=2024-02-01'
classifyidlisturl = 'http://10.189.2.78:8108/v1/edb/list?ClassifyId='
edbcodedataurl = 'http://10.189.2.78:8108/v1/edb/data?EdbCode='
edbdatapushurl = 'http://10.189.2.78:8108/v1/edb/push'
edbdeleteurl = 'http://10.189.2.78:8108/v1/edb/business/edb/del'
edbbusinessurl = 'http://10.189.2.78:8108/v1/edb/business/data/del'
edbcodelist = ['CO1 Comdty', 'ovx index', 'C2404194834', 'C2404199738', 'dxy curncy', 'C2403128043', 'C2403150124',
'DOESCRUD Index', 'WTRBM1 EEGC Index', 'FVHCM1 INDEX', 'doedtprd index', 'CFFDQMMN INDEX',
'C2403083739', 'C2404167878', 'C2403250571', 'lmcads03 lme comdty', 'GC1 COMB Comdty',
'C2404171822','C2404167855']
# 临时写死用指定的列,与上面的edbcode对应后面更改
edbnamelist = [
'ds','y',
'Brent c1-c6','Brent c1-c3','Brent-WTI','美国商业原油库存',
'DFL','美国汽油裂解价差','ovx index','dxy curncy','lmcads03 lme comdty',
'C2403128043','C2403150124','FVHCM1 INDEX','doedtprd index','CFFDQMMN INDEX',
'C2403083739','C2404167878',
'GC1 COMB Comdty','C2404167855'
]
# eta自有数据指标编码
modelsindex = {
'NHITS': 'SELF0000001',
'Informer':'SELF0000057',
'LSTM':'SELF0000058',
'iTransformer':'SELF0000059',
'TSMixer':'SELF0000060',
'TSMixerx':'SELF0000061',
'PatchTST':'SELF0000062',
'RNN':'SELF0000063',
'GRU':'SELF0000064',
'TCN':'SELF0000065',
'BiTCN':'SELF0000066',
'DilatedRNN':'SELF0000067',
'MLP':'SELF0000068',
'DLinear':'SELF0000069',
'NLinear':'SELF0000070',
'TFT':'SELF0000071',
'FEDformer':'SELF0000072',
'StemGNN':'SELF0000073',
'MLPMultivariate':'SELF0000074',
'TiDE':'SELF0000075',
'DeepNPTS':'SELF0000076'
}
# eta 上传预测结果的请求体,后面发起请求的时候更改 model datalist 数据
data = {
"IndexCode": "",
"IndexName": "价格预测模型",
"Unit": "",
"Frequency": "日度",
"SourceName": f"价格预测",
"Remark": 'ddd',
"DataList": [
{
"Date": "2024-05-02",
"Value": 333444
}
]
}
# eta 分类
# level3才可以获取到数据所以需要人工把能源化工下所有的level3级都找到
# url = 'http://10.189.2.78:8108/v1/edb/list?ClassifyId=1214'
#ParentId ":1160, 能源化工
# ClassifyId ":1214,原油
#ParentId ":1214,",就是原油下所有的数据。
ClassifyId = 1214
### 报告上传配置
# 变量定义--线上环境
# login_pushreport_url = "http://10.200.32.39/jingbo-api/api/server/login"
# upload_url = "http://10.200.32.39/jingbo-api/api/analysis/reportInfo/researchUploadReportSave"
# login_data = {
# "data": {
# "account": "api_dev",
# "password": "ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=",
# "tenantHashCode": "8a4577dbd919675758d57999a1e891fe",
# "terminal": "API"
# },
# "funcModule": "API",
# "funcOperation": "获取token"
# }
# upload_data = {
# "funcModule":'研究报告信息',
# "funcOperation":'上传原油价格预测报告',
# "data":{
# "ownerAccount":'27663', #报告所属用户账号 27663 - 刘小朋
# "reportType":'OIL_PRICE_FORECAST', # 报告类型固定为OIL_PRICE_FORECAST
# "fileName": '', #文件名称
# "fileBase64": '' ,#文件内容base64
# "categoryNo":'yyjgycbg', # 研究报告分类编码
# "smartBusinessClassCode":'YCJGYCBG', #分析报告分类编码
# "reportEmployeeCode":"E40482" ,# 报告人 E40482 - 管理员 0000027663 - 刘小朋
# "reportDeptCode" :"002000621000", # 报告部门 - 002000621000 SH期货研究部
# "productGroupCode":"RAW_MATERIAL" # 商品分类
# }
# }
# # 变量定义--测试环境
login_pushreport_url = "http://192.168.100.53:8080/jingbo-dev/api/server/login"
upload_url = "http://192.168.100.53:8080/jingbo-dev/api/analysis/reportInfo/researchUploadReportSave"
# upload_url = "http://192.168.100.109:8080/jingbo/api/analysis/reportInfo/researchUploadReportSave" # zhaoqiwei
login_data = {
"data": {
"account": "api_test",
"password": "MmVmNzNlOWI0MmY0ZDdjZGUwNzE3ZjFiMDJiZDZjZWU=",
"tenantHashCode": "8a4577dbd919675758d57999a1e891fe",
"terminal": "API"
},
"funcModule": "API",
"funcOperation": "获取token"
}
upload_data = {
"funcModule":'研究报告信息',
"funcOperation":'上传原油价格预测报告',
"data":{
"ownerAccount":'arui', #报告所属用户账号
"reportType":'OIL_PRICE_FORECAST', # 报告类型固定为OIL_PRICE_FORECAST
"fileName": '2000-40-5-50--100-原油指标数据.xlsx-Brent活跃合约--2024-09-06-15-01-29-预测报告.pdf', #文件名称
"fileBase64": '' ,#文件内容base64
"categoryNo":'yyjgycbg', # 研究报告分类编码
"smartBusinessClassCode":'YCJGYCBG', #分析报告分类编码
"reportEmployeeCode":"E40116", # 报告人
"reportDeptCode" :"D0044" ,# 报告部门
"productGroupCode":"RAW_MATERIAL" # 商品分类
}
}
### 线上开关
# is_train = True # 是否训练
# is_debug = False # 是否调试
# is_eta = True # 是否使用eta接口
# is_timefurture = True # 是否使用时间特征
# is_fivemodels = False # 是否使用之前保存的最佳的5个模型
# is_edbcode = False # 特征使用edbcoding列表中的
# is_edbnamelist = False # 自定义特征对应上面的edbnamelist
# is_update_eta = True # 预测结果上传到eta
# is_update_report = True # 是否上传报告
### 开关
is_train = True # 是否训练
is_debug = False # 是否调试
is_eta = True # 是否使用eta接口
is_timefurture = True # 是否使用时间特征
is_fivemodels = False # 是否使用之前保存的最佳的5个模型
is_edbcode = False # 特征使用edbcoding列表中的
is_edbnamelist = False # 自定义特征对应上面的edbnamelist
is_update_eta = False # 预测结果上传到eta
is_update_report = False # 是否上传报告
# 数据截取日期
end_time = '' # 数据截取日期
delweekenday = True
is_corr = False # 特征是否参与滞后领先提升相关系数
add_kdj = False # 是否添加kdj指标
if add_kdj and is_edbnamelist:
edbnamelist = edbnamelist+['K','D','J']
### 模型参数
y = 'Brent活跃合约' # 原油指标数据的目标变量
# y = '期货结算价(连续):布伦特原油:前一个观测值' # ineoil的目标变量
horizon =5 # 预测的步长
input_size = 40 # 输入序列长度
train_steps = 50 if is_debug else 1000 # 训练步数,用来限定epoch次数
val_check_steps = 30 # 评估频率
early_stop_patience_steps = 5 # 早停的耐心步数
# --- 交叉验证用的参数
test_size = 200 # 测试集大小定义100后面使用的时候重新赋值
val_size = test_size # 验证集大小,同测试集大小
### 特征筛选用到的参数
k = 100 # 特征筛选数量如果是0或者值比特征数量大代表全部特征
### 文件
data_set = '原油指标数据.xlsx' # 数据集文件
# data_set = 'INE_OIL(1).csv'
### 文件夹
dataset = 'dataset' # 数据集文件夹
# 数据库名称
db_name = os.path.join(dataset,'jbsh_yuanyou.db')
settings = f'{input_size}-{horizon}-{train_steps}--{k}-{data_set}-{y}'
# 获取日期时间
now = datetime.datetime.now().strftime('%Y%m%d%H%M%S') # 获取当前日期时间
reportname = f'Brent原油大模型预测--{now}.pdf' # 报告文件名
reportname = reportname.replace(':', '-') # 替换冒号
### 邮件配置
username='1321340118@qq.com'
passwd='wgczgyhtyyyyjghi'
# recv=['liurui_test@163.com','52585119@qq.com']
recv=['liurui_test@163.com']
title='reportname'
content='brent价格预测报告请看附件'
file=os.path.join(dataset,'reportname')
# file=os.path.join(dataset,'14-7-50--100-原油指标数据.xlsx-Brent连1合约价格--20240731175936-预测报告.pdf')
ssl=True
### 日志配置
# 创建日志目录(如果不存在)
log_dir = 'logs'
if not os.path.exists(log_dir):
os.makedirs(log_dir)
# 配置日志记录器
logger = logging.getLogger('my_logger')
logger.setLevel(logging.INFO)
# 配置文件处理器,将日志记录到文件
file_handler = logging.handlers.RotatingFileHandler(os.path.join(log_dir, 'pricepredict.log'), maxBytes=1024 * 1024, backupCount=5)
file_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
# 配置控制台处理器,将日志打印到控制台
console_handler = logging.StreamHandler()
console_handler.setFormatter(logging.Formatter('%(message)s'))
# 将处理器添加到日志记录器
logger.addHandler(file_handler)
logger.addHandler(console_handler)
# logger.info('当前配置:'+settings)

297
config_juxiting.py Normal file
View File

@ -0,0 +1,297 @@
import logging
import os
import logging.handlers
import datetime
# eta 接口token
APPID = "XNLDvxZHHugj7wJ7"
SECRET = "iSeU4s6cKKBVbt94htVY1p0sqUMqb2xa"
# eta 接口url
sourcelisturl = 'http://10.189.2.78:8108/v1/edb/source/list'
classifylisturl = 'http://10.189.2.78:8108/v1/edb/classify/list?ClassifyType='
uniquecodedataurl = 'http://10.189.2.78:8108/v1/edb/data?UniqueCode=4991c37becba464609b409909fe4d992&StartDate=2024-02-01'
classifyidlisturl = 'http://10.189.2.78:8108/v1/edb/list?ClassifyId='
edbcodedataurl = 'http://10.189.2.78:8108/v1/edb/data?EdbCode='
edbdatapushurl = 'http://10.189.2.78:8108/v1/edb/push'
edbdeleteurl = 'http://10.189.2.78:8108/v1/edb/business/edb/del'
edbbusinessurl = 'http://10.189.2.78:8108/v1/edb/business/data/del'
edbcodelist = ['ID01385938','lmcads03 lme comdty',
'GC1 COMB Comdty',
'C2404171822',
'dxy curncy',
'S5443199 ',
'S5479800',
'S5443108',
'H7358586',
'LC3FM1 INDEX',
'CNY REGN Curncy',
's0105897',
'M0067419',
'M0066351',
'S0266372',
'S0266438',
'S0266506']
# 临时写死用指定的列,与上面的edbcode对应后面更改
edbnamelist = [
'ds','y',
'LME铜价',
'黄金连1合约',
'Brent-WTI',
'美元指数',
'甲醇鲁南价格',
'甲醇太仓港口价格',
'山东丙烯主流价',
'丙烷(山东)',
'FEI丙烷 M1',
'在岸人民币汇率',
'南华工业品指数',
'PVC期货主力',
'PE期货收盘价',
'PP连续-1月',
'PP连续-5月',
'PP连续-9月',
]
edbcodenamedict = {
'ID01385938':'PP拉丝1102K市场价青州国家能源宁煤',
'lmcads03 lme comdty':'LME铜价',
'GC1 COMB Comdty':'黄金连1合约',
'C2404171822':'Brent-WTI',
'dxy curncy':'美元指数',
'S5443199 ':'甲醇鲁南价格',
'S5479800':'甲醇太仓港口价格',
'S5443108':'山东丙烯主流价',
'H7358586':'丙烷(山东)',
'LC3FM1 INDEX':'FEI丙烷 M1',
'CNY REGN Curncy':'在岸人民币汇率',
's0105897':'南华工业品指数',
'M0067419':'PVC期货主力',
'M0066351':'PE期货收盘价',
'S0266372':'PP连续-1月',
'S0266438':'PP连续-5月',
'S0266506':'PP连续-9月',
}
# eta自有数据指标编码
modelsindex = {
'NHITS': 'SELF0000001',
'Informer':'SELF0000057',
'LSTM':'SELF0000058',
'iTransformer':'SELF0000059',
'TSMixer':'SELF0000060',
'TSMixerx':'SELF0000061',
'PatchTST':'SELF0000062',
'RNN':'SELF0000063',
'GRU':'SELF0000064',
'TCN':'SELF0000065',
'BiTCN':'SELF0000066',
'DilatedRNN':'SELF0000067',
'MLP':'SELF0000068',
'DLinear':'SELF0000069',
'NLinear':'SELF0000070',
'TFT':'SELF0000071',
'FEDformer':'SELF0000072',
'StemGNN':'SELF0000073',
'MLPMultivariate':'SELF0000074',
'TiDE':'SELF0000075',
'DeepNPTS':'SELF0000076'
}
# eta 上传预测结果的请求体,后面发起请求的时候更改 model datalist 数据
data = {
"IndexCode": "",
"IndexName": "价格预测模型",
"Unit": "",
"Frequency": "日度",
"SourceName": f"价格预测",
"Remark": 'ddd',
"DataList": [
{
"Date": "2024-05-02",
"Value": 333444
}
]
}
# eta 分类
# level3才可以获取到数据所以需要人工把能源化工下所有的level3级都找到
# url = 'http://10.189.2.78:8108/v1/edb/list?ClassifyId=1214'
#ParentId ":1160, 能源化工
# ClassifyId ":1214,原油 ,1161 PP
#ParentId ":1214,",就是原油下所有的数据。
ClassifyId = 1161
### 报告上传配置
# 变量定义--线上环境
login_pushreport_url = "http://10.200.32.39/jingbo-api/api/server/login"
upload_url = "http://10.200.32.39/jingbo-api/api/dw/dataValue/pushDataValueList"
login_data = {
"data": {
"account": "api_dev",
"password": "ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=",
"tenantHashCode": "8a4577dbd919675758d57999a1e891fe",
"terminal": "API"
},
"funcModule": "API",
"funcOperation": "获取token"
}
upload_data = {
"funcModule":'研究报告信息',
"funcOperation":'上传原油价格预测报告',
"data":{
"ownerAccount":'27663', #报告所属用户账号 27663 - 刘小朋
"reportType":'OIL_PRICE_FORECAST', # 报告类型固定为OIL_PRICE_FORECAST
"fileName": '', #文件名称
"fileBase64": '' ,#文件内容base64
"categoryNo":'yyjgycbg', # 研究报告分类编码
"smartBusinessClassCode":'YCJGYCBG', #分析报告分类编码
"reportEmployeeCode":"E40482" ,# 报告人 E40482 - 管理员 0000027663 - 刘小朋
"reportDeptCode" :"002000621000", # 报告部门 - 002000621000 SH期货研究部
"productGroupCode":"RAW_MATERIAL" # 商品分类
}
}
# # 变量定义--测试环境
# login_pushreport_url = "http://192.168.100.53:8080/jingbo-dev/api/server/login"
# upload_url = "http://192.168.100.53:8080/jingbo-dev/api/analysis/reportInfo/researchUploadReportSave"
# # upload_url = "http://192.168.100.109:8080/jingbo/api/analysis/reportInfo/researchUploadReportSave" # zhaoqiwei
# login_data = {
# "data": {
# "account": "api_test",
# "password": "MmVmNzNlOWI0MmY0ZDdjZGUwNzE3ZjFiMDJiZDZjZWU=",
# "tenantHashCode": "8a4577dbd919675758d57999a1e891fe",
# "terminal": "API"
# },
# "funcModule": "API",
# "funcOperation": "获取token"
# }
# upload_data = {
# "funcModule":'研究报告信息',
# "funcOperation":'上传原油价格预测报告',
# "data":{
# "ownerAccount":'arui', #报告所属用户账号
# "reportType":'OIL_PRICE_FORECAST', # 报告类型固定为OIL_PRICE_FORECAST
# "fileName": '2000-40-5-50--100-原油指标数据.xlsx-Brent活跃合约--2024-09-06-15-01-29-预测报告.pdf', #文件名称
# "fileBase64": '' ,#文件内容base64
# "categoryNo":'yyjgycbg', # 研究报告分类编码
# "smartBusinessClassCode":'YCJGYCBG', #分析报告分类编码
# "reportEmployeeCode":"E40116", # 报告人
# "reportDeptCode" :"D0044" ,# 报告部门
# "productGroupCode":"RAW_MATERIAL" # 商品分类
# }
# }
### 线上开关
# is_train = True # 是否训练
# is_debug = False # 是否调试
# is_eta = True # 是否使用eta接口
# is_timefurture = True # 是否使用时间特征
# is_fivemodels = False # 是否使用之前保存的最佳的5个模型
# is_edbcode = False # 特征使用edbcoding列表中的
# is_edbnamelist = False # 自定义特征对应上面的edbnamelist
# is_update_report = True # 是否上传报告
### 开关
is_train = True # 是否训练
is_debug = False # 是否调试
is_eta = True # 是否使用eta接口
is_timefurture = True # 是否使用时间特征
is_fivemodels = False # 是否使用之前保存的最佳的5个模型
is_edbcode = False # 特征使用edbcoding列表中的
is_edbnamelist = False # 自定义特征对应上面的edbnamelist
is_update_eta = False # 预测结果上传到eta
is_update_report = False # 是否上传报告
# 数据截取日期
end_time = '' # 数据截取日期
delweekenday = True
is_corr = False # 特征是否参与滞后领先提升相关系数
add_kdj = False # 是否添加kdj指标
if add_kdj and is_edbnamelist:
edbnamelist = edbnamelist+['K','D','J']
### 模型参数
y = 'PP拉丝1102K市场价青州国家能源宁煤' # 原油指标数据的目标变量
# y = '期货结算价(连续):布伦特原油:前一个观测值' # ineoil的目标变量
horizon =5 # 预测的步长
input_size = 40 # 输入序列长度
train_steps = 50 if is_debug else 1000 # 训练步数,用来限定epoch次数
val_check_steps = 30 # 评估频率
early_stop_patience_steps = 5 # 早停的耐心步数
# --- 交叉验证用的参数
test_size = 200 # 测试集大小定义100后面使用的时候重新赋值
val_size = test_size # 验证集大小,同测试集大小
### 特征筛选用到的参数
k = 100 # 特征筛选数量如果是0或者值比特征数量大代表全部特征
### 文件
data_set = 'PP指标数据.xlsx' # 数据集文件
# data_set = 'INE_OIL(1).csv'
### 文件夹
dataset = 'dataset' # 数据集文件夹
# 数据库名称
db_name = os.path.join(dataset,'jbsh_juxiting.db')
settings = f'{input_size}-{horizon}-{train_steps}--{k}-{data_set}-{y}'
# 获取日期时间
now = datetime.datetime.now().strftime('%Y%m%d%H%M%S') # 获取当前日期时间
reportname = f'PP--{now}-预测报告.pdf' # 报告文件名
reportname = reportname.replace(':', '-') # 替换冒号
### 邮件配置
username='1321340118@qq.com'
passwd='wgczgyhtyyyyjghi'
# recv=['liurui_test@163.com','52585119@qq.com']
recv=['liurui_test@163.com']
title=reportname
content=y+'预测报告请看附件'
file=os.path.join(dataset,reportname)
# file=os.path.join(dataset,'14-7-50--100-原油指标数据.xlsx-Brent连1合约价格--20240731175936-预测报告.pdf')
ssl=True
### 日志配置
# 创建日志目录(如果不存在)
log_dir = 'logs'
if not os.path.exists(log_dir):
os.makedirs(log_dir)
# 配置日志记录器
logger = logging.getLogger('my_logger')
logger.setLevel(logging.INFO)
# 配置文件处理器,将日志记录到文件
file_handler = logging.handlers.RotatingFileHandler(os.path.join(log_dir, 'pricepredict.log'), maxBytes=1024 * 1024, backupCount=5)
file_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
# 配置控制台处理器,将日志打印到控制台
console_handler = logging.StreamHandler()
console_handler.setFormatter(logging.Formatter('%(message)s'))
# 将处理器添加到日志记录器
logger.addHandler(file_handler)
logger.addHandler(console_handler)
# logger.info('当前配置:'+settings)

106
config_tansuanli.py Normal file
View File

@ -0,0 +1,106 @@
import logging
import os
import logging.handlers
# eta 接口token
APPID = "XNLDvxZHHugj7wJ7"
SECRET = "iSeU4s6cKKBVbt94htVY1p0sqUMqb2xa"
# eta 接口url
sourcelisturl = 'http://10.189.2.78:8108/v1/edb/source/list'
classifylisturl = 'http://10.189.2.78:8108/v1/edb/classify/list?ClassifyType='
uniquecodedataurl = 'http://10.189.2.78:8108/v1/edb/data?UniqueCode=4991c37becba464609b409909fe4d992&StartDate=2024-02-01'
classifyidlisturl = 'http://10.189.2.78:8108/v1/edb/list?ClassifyId='
edbcodedataurl = 'http://10.189.2.78:8108/v1/edb/data?EdbCode='
edbcodelist = ['CO1 Comdty', 'ovx index', 'C2404194834', 'C2404199738', 'dxy curncy', 'C2403128043', 'C2403150124',
'DOESCRUD Index', 'WTRBM1 EEGC Index', 'FVHCM1 INDEX', 'doedtprd index', 'CFFDQMMN INDEX',
'C2403083739', 'C2404167878', 'C2403250571', 'ovx index', 'lmcads03 lme comdty', 'GC1 COMB Comdty',
'C2404171822']
# 临时写死用指定的列,与上面的edbcode对应后面更改
edbnamelist = [
'ds','y',
'Brent c1-c6','Brent c1-c3','Brent-WTI','美国商业原油库存',
'DFL','美国汽油裂解价差','ovx index','dxy curncy','lmcads03 lme comdty',
'C2403128043','C2403150124','FVHCM1 INDEX','doedtprd index','CFFDQMMN INDEX',
'C2403083739','C2404167878',
# 'ovx index',
'GC1 COMB Comdty'
]
# eta 分类
# level3才可以获取到数据所以需要人工把能源化工下所有的level3级都找到
# url = 'http://10.189.2.78:8108/v1/edb/list?ClassifyId=1214'
#ParentId ":1160, 能源化工
# ClassifyId ":1214,原油
#ParentId ":1214,",就是原油下所有的数据。
ClassifyId = 1214
### 开关
is_train = True # 是否训练
is_debug = True # 是否调试
is_eta = False # 是否使用eta接口
is_timefurture = False # 是否使用时间特征
is_fivemodels = False # 是否使用之前保存的最佳的5个模型
is_edbcode = False # 特征使用edbcoding列表中的
is_edbnamelist = False # 启用指定的edbname,影响特征选择,只有数据集 原油时 才考虑启用
# 数据截取日期
end_time = '2024-07-30' # 数据截取日期
delweekenday = True # 是否删除周末数据
### 模型参数
y = '电碳价格' # 原油指标数据的目标变量
horizon =5 # 预测的步长
input_size = 10 # 输入序列长度
train_steps = 10 if is_debug else 1000 # 训练步数,用来限定epoch次数
val_check_steps = 30 # 评估频率
early_stop_patience_steps = 5 # 早停的耐心步数
### --- 交叉验证用的参数
test_size = 100 # 测试集大小定义100后面使用的时候重新赋值
val_size = test_size # 验证集大小,同测试集大小
### --- 特征筛选用到的参数
k = 100 # 特征筛选数量如果是0或者值比特征数量大代表全部特征
### --- 文件
data_set = '碳酸锂合并数据.csv' # 数据集文件
### --- 文件夹
dataset = 'dataset' # 数据集文件夹
settings = f'{input_size}-{horizon}-{train_steps}--{k}-{data_set}-{y}'
import datetime
now = datetime.datetime.now().strftime('%Y%m%d%H%M%S') # 获取当前日期时间
reportname = f'{settings}--{now}-预测报告.pdf' # 报告文件名
reportname = reportname.replace(':', '-') # 替换冒号
### 邮件配置
username='1321340118@qq.com'
passwd='wgczgyhtyyyyjghi'
# recv=['liurui_test@163.com','52585119@qq.com']
recv=['liurui_test@163.com']
title='reportname'
content='brent价格预测报告请看附件'
file=os.path.join(dataset,'reportname')
# file=os.path.join(dataset,'14-7-50--100-原油指标数据.xlsx-Brent连1合约价格--20240731175936-预测报告.pdf')
ssl=True
### --- 日志配置
# 创建日志目录(如果不存在)
log_dir = 'logs'
if not os.path.exists(log_dir):
os.makedirs(log_dir)
# 配置日志记录器
logger = logging.getLogger('my_logger')
logger.setLevel(logging.INFO)
# 配置文件处理器,将日志记录到文件
file_handler = logging.handlers.RotatingFileHandler(os.path.join(log_dir, 'pricepredict.log'), maxBytes=1024 * 1024, backupCount=5)
file_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
# 配置控制台处理器,将日志打印到控制台
console_handler = logging.StreamHandler()
console_handler.setFormatter(logging.Formatter('%(message)s'))
# 将处理器添加到日志记录器
logger.addHandler(file_handler)
logger.addHandler(console_handler)
logger.info('当前配置:'+settings)

0
lib/__init__.py Normal file
View File

1391
lib/dataread.py Normal file

File diff suppressed because it is too large Load Diff

191
lib/duojinchengpredict.py Normal file
View File

@ -0,0 +1,191 @@
import pandas as pd
import re
import os
import pandas as pd
import multiprocessing
import time
import joblib
import torch
# 定义函数
def loadcsv(filename):
try:
df = pd.read_csv(filename, encoding='utf-8')
except UnicodeDecodeError:
df = pd.read_csv(filename, encoding='gbk')
return df
def datachuli(df, datecol='date'):
# 删除空列
df = df.dropna(axis=1, how='all')
# 向上填充
df.ffill
# 向下填充
df.bfill
# date转为pddate
df.rename(columns={datecol: 'ds'}, inplace=True)
# 设置ds为pd.datetime
df['ds'] = pd.to_datetime(df['ds'])
# 重命名预测列
df.rename(columns={'Brent连1合约价格': 'y'}, inplace=True)
return df
def getdata(filename, datecol='date'):
df = loadcsv(filename)
df = datachuli(df, datecol)
return df
# 预测函数
def predict(X_test, nf,result_list):
df_predict = nf.predict(X_test).reset_index()
result_list.append(df_predict.values.tolist())
return df_predict
def testSetPredict(X_test, nf, columns,dataset):
# 记录开始时间
start_time = time.time()
# 计算每个进程处理的样本数
num_samples = len(X_test)
num_processes = multiprocessing.cpu_count()
samples_per_process = num_samples // num_processes
manager = multiprocessing.Manager()
result_list = manager.list() # 创建共享的列表
# 创建进程池
with multiprocessing.Pool(num_processes) as pool:
processes = []
for i in range(num_processes):
# 计算 每个进程需要处理的数据索引
start_index = i * samples_per_process
end_index = (i + 1) * samples_per_process if i != num_processes - 1 else num_samples
# 按计算的索引切分数据
X_test_split = X_test[start_index:end_index]
# 添加任务到进程池
for X in X_test_split:
processes.append(pool.apply_async(predict, args=(X, nf,result_list)))
for process in processes:
process.get()
# 将共享列表中的数据转换回 DataFrame
df_combined = pd.DataFrame()
df_combined2 = pd.DataFrame()
for result in result_list:
try:
df_shared = pd.DataFrame(result, columns=['index', 'ds'] + columns)
df_combined = pd.concat([df_combined, df_shared]).reset_index(drop=True)
except ValueError:
# 如果数据不匹配,就放到另一个 DataFrame 中
df_shared2 = pd.DataFrame(result, columns=['index', 'ds']+ columns2)
df_combined2 = pd.concat([df_combined2, df_shared2]).reset_index(drop=True)
# df_combined.drop(['index'], axis=1, inplace=True)
df_combined.to_csv(os.path.join(dataset, 'df_combined.csv'), index=False)
# df_combined2.drop(['index'], axis=1, inplace=True)
df_combined2.to_csv('df_combined.csv', index=False)
end_time = time.time()
# 打印运行时间,转为时分秒
print("运行时间:", end_time - start_time, "")
if __name__ == '__main__':
# 记录开始时间
start_time = time.time()
# file = '指标数据处理.csv'
file = 'brentpricepredict.csv'
df = getdata(file)
df.head()
# 选择特征和标签列
X = df.drop(['y', 'ds'], axis=1) # 特征集,排除时间戳和标签列 Brent连1合约价格
y = df['y'] # 标签集
# 计算训练集的结束索引占总数据的80%
split_index = int(0.8 * df.shape[0])
# 按照时间顺序划分训练集和测试集
df_train = df[:split_index]
df_test = df[split_index:]
df_train['unique_id'] = 1
df_test['unique_id'] = 1
df_combined = pd.DataFrame()
df_test = df_test.reindex()
# df_test = df_test[-20:]
# 读取模型列表,用来预测结果列名
columns = [
'NHITS',
'Informer',
'LSTM',
'iTransformer',
'TSMixer',
'TSMixerx',
'PatchTST',
'RNN',
'GRU',
'TCN',
'DeepAR',
'BiTCN',
'DilatedRNN',
'MLP',
'DLinear',
'NLinear',
'TFT',
'FEDformer',
'StemGNN',
'MLPMultivariate',
'TiDE',
'DeepNPTS',
]
# deepar 的预测结果会多 五个列,需要单独处理
columns2 = [
'NHITS',
'Informer',
'LSTM',
'iTransformer',
'TSMixer',
'TSMixerx',
'PatchTST',
'RNN',
'GRU',
'TCN',
'DeepAR',
'DeepAR-median',
'DeepAR-lo-90',
'DeepAR-lo-80',
'DeepAR-hi-80',
'DeepAR-hi-90',
'BiTCN',
'DilatedRNN',
'MLP',
'DLinear',
'NLinear',
'TFT',
'FEDformer',
'StemGNN',
'MLPMultivariate',
'TiDE',
'DeepNPT',
]
input_size = 14
X_test = []
for i in range(0, len(df_test) - input_size + 1):
X_test.append(df_test.iloc[i:i + input_size])
nf = joblib.load('model_reg.joblib')
testSetPredict(X_test, nf, columns)

448
lib/tools.py Normal file
View File

@ -0,0 +1,448 @@
import time
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from config_jingbo import logger
from sklearn import metrics
import random, string, base64, hmac, hashlib
from reportlab.pdfbase import pdfmetrics # 注册字体
from reportlab.pdfbase.ttfonts import TTFont # 字体类
from reportlab.platypus import Table, SimpleDocTemplate, Paragraph, Image # 报告内容相关类
from reportlab.lib.pagesizes import letter # 页面的标志尺寸(8.5*inch, 11*inch)
from reportlab.lib.styles import getSampleStyleSheet # 文本样式
from reportlab.lib import colors # 颜色模块
from reportlab.graphics.charts.barcharts import VerticalBarChart # 图表类
from reportlab.graphics.charts.legends import Legend # 图例类
from reportlab.graphics.shapes import Drawing # 绘图工具
from reportlab.lib.units import cm # 单位cm
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
import sqlite3
import tkinter as tk
from tkinter import messagebox
def timeit(func):
'''计时装饰器'''
def wrapper(*args, **kwargs):
start_time = time.time()
result = func(*args, **kwargs)
end_time = time.time()
execution_time = end_time - start_time
logger.info(f"{func.__name__} 函数的执行时间为: {execution_time}")
return result
return wrapper
class BinanceAPI:
'''
获取 Binance API 请求头签名
'''
def __init__(self, APPID, SECRET):
self.APPID = APPID
self.SECRET = SECRET
self.get_signature()
# 生成随机字符串作为 nonce
def generate_nonce(self, length=32):
self.nonce = ''.join(random.choices(string.ascii_letters + string.digits, k=length))
return self.nonce
# 获取当前时间戳(秒)
def get_timestamp(self):
return int(time.time())
# 构建待签名字符串
def build_sign_str(self):
return f'appid={self.APPID}&nonce={self.nonce}&timestamp={self.timestamp}'
# 使用 HMAC SHA-256 计算签名
def calculate_signature(self, secret, message):
return base64.urlsafe_b64encode(hmac.new(secret.encode('utf-8'), message.encode('utf-8'), hashlib.sha256).digest()).decode('utf-8')
def get_signature(self):
# 调用上述方法生成签名
self.nonce = self.generate_nonce()
self.timestamp = self.get_timestamp()
self.sign_str = self.build_sign_str()
self.signature = self.calculate_signature(self.SECRET, self.sign_str)
# return self.signature
class Graphs:
'''
pdf生成类
'''
# 绘制标题
@staticmethod
def draw_title(title: str):
# 获取所有样式表
style = getSampleStyleSheet()
# 拿到标题样式
ct = style['Heading1']
# 单独设置样式相关属性
ct.fontName = 'SimSun' # 字体名
ct.fontSize = 18 # 字体大小
ct.leading = 50 # 行间距
ct.textColor = colors.green # 字体颜色
ct.alignment = 1 # 居中
ct.bold = True
# 创建标题对应的段落,并且返回
return Paragraph(title, ct)
# 绘制小标题
@staticmethod
def draw_little_title(title: str):
# 获取所有样式表
style = getSampleStyleSheet()
# 拿到标题样式
ct = style['Normal']
# 单独设置样式相关属性
ct.fontName = 'SimSun' # 字体名
ct.fontSize = 15 # 字体大小
ct.leading = 30 # 行间距
ct.textColor = colors.red # 字体颜色
# 创建标题对应的段落,并且返回
return Paragraph(title, ct)
# 绘制普通段落内容
@staticmethod
def draw_text(text: str):
# 获取所有样式表
style = getSampleStyleSheet()
# 获取普通样式
ct = style['Normal']
ct.fontName = 'SimSun'
ct.fontSize = 12
ct.wordWrap = 'CJK' # 设置自动换行
ct.alignment = 0 # 左对齐
ct.firstLineIndent = 32 # 第一行开头空格
ct.leading = 25
return Paragraph(text, ct)
# 绘制表格
@staticmethod
def draw_table(col_width,*args):
# 列宽度
col_width = col_width
style = [
('FONTNAME', (0, 0), (-1, -1), 'SimSun'), # 字体
('FONTSIZE', (0, 0), (-1, 0), 10), # 第一行的字体大小
('FONTSIZE', (0, 1), (-1, -1), 8), # 第二行到最后一行的字体大小
('BACKGROUND', (0, 0), (-1, 0), '#d5dae6'), # 设置第一行背景颜色
('ALIGN', (0, 0), (-1, -1), 'CENTER'), # 第一行水平居中
('ALIGN', (0, 1), (-1, -1), 'LEFT'), # 第二行到最后一行左右左对齐
('VALIGN', (0, 0), (-1, -1), 'MIDDLE'), # 所有表格上下居中对齐
('TEXTCOLOR', (0, 0), (-1, -1), colors.darkslategray), # 设置表格内文字颜色
('GRID', (0, 0), (-1, -1), 0.5, colors.grey), # 设置表格框线为grey色线宽为0.5
# ('SPAN', (0, 1), (0, 2)), # 合并第一列二三行
# ('SPAN', (0, 3), (0, 4)), # 合并第一列三四行
# ('SPAN', (0, 5), (0, 6)), # 合并第一列五六行
# ('SPAN', (0, 7), (0, 8)), # 合并第一列五六行
]
table = Table(args, colWidths=col_width, style=style)
return table
# 创建图表
@staticmethod
def draw_bar(bar_data: list, ax: list, items: list):
drawing = Drawing(500, 250)
bc = VerticalBarChart()
bc.x = 45 # 整个图表的x坐标
bc.y = 45 # 整个图表的y坐标
bc.height = 200 # 图表的高度
bc.width = 350 # 图表的宽度
bc.data = bar_data
bc.strokeColor = colors.black # 顶部和右边轴线的颜色
bc.valueAxis.valueMin = 5000 # 设置y坐标的最小值
bc.valueAxis.valueMax = 26000 # 设置y坐标的最大值
bc.valueAxis.valueStep = 2000 # 设置y坐标的步长
bc.categoryAxis.labels.dx = 2
bc.categoryAxis.labels.dy = -8
bc.categoryAxis.labels.angle = 20
bc.categoryAxis.categoryNames = ax
# 图示
leg = Legend()
leg.fontName = 'SimSun'
leg.alignment = 'right'
leg.boxAnchor = 'ne'
leg.x = 475 # 图例的x坐标
leg.y = 240
leg.dxTextSpace = 10
leg.columnMaximum = 3
leg.colorNamePairs = items
drawing.add(leg)
drawing.add(bc)
return drawing
# 绘制图片
@staticmethod
def draw_img(path):
img = Image(path) # 读取指定路径下的图片
img.drawWidth = 20*cm # 设置图片的宽度
img.drawHeight = 10*cm # 设置图片的高度
return img
# 评估指标不在一个库,这里列出所有用到的指标的公式
# MSE
def mse(y_true, y_pred):
res_mse = metrics.mean_squared_error(y_true, y_pred)
return res_mse
# RMSE
def rmse(y_true, y_pred):
res_rmse = np.sqrt(metrics.mean_squared_error(y_true, y_pred))
return res_rmse
# MAE
def mae(y_true, y_pred):
res_mae = metrics.mean_absolute_error(y_true, y_pred)
return res_mae
# sklearn的库中没有MAPE和SMAPE下面根据公式给出算法实现
# MAPE
def mape(y_true, y_pred):
res_mape = np.mean(np.abs((y_pred - y_true) / y_true)) * 100
return res_mape
# SMAPE
def smape(y_true, y_pred):
res_smape = 2.0 * np.mean(np.abs(y_pred - y_true) / (np.abs(y_pred) + np.abs(y_true))) * 100
return res_smape
# 相关系数绘制
def plot_corr(data, size=11):
# 去掉ds列
data.drop(columns=['ds'], inplace=True)
# 创建一个空的 DataFrame 来保存相关系数
correlation_df = pd.DataFrame(columns=['Feature', 'Correlation'])
# 计算各特征与目标列的皮尔逊相关系数,并保存到新的 DataFrame 中
for col in data.columns:
if col!= 'y':
pearson_correlation = np.corrcoef(data[col], data['y'])[0, 1]
spearman_correlation, _ = spearmanr(data[col], data['y'])
new_row = {'Feature': col, 'Pearson_Correlation': round(pearson_correlation,3), 'Spearman_Correlation': round(spearman_correlation,2)}
correlation_df = correlation_df._append(new_row, ignore_index=True)
# 删除空列
correlation_df.drop('Correlation', axis=1, inplace=True)
correlation_df.dropna(inplace=True)
correlation_df.to_csv('指标相关性分析.csv', index=False)
data = correlation_df['Pearson_Correlation'].values.tolist()
# 生成 -1 到 1 的 20 个区间
bins = np.linspace(-1, 1, 21)
# 计算每个区间的统计数(这里是区间内数据的数量)
hist_values = [np.sum((data >= bins[i]) & (data < bins[i + 1])) for i in range(len(bins) - 1)]
#设置画布大小
plt.figure(figsize=(10, 6))
# 绘制直方图
plt.bar(bins[:-1], hist_values, width=(bins[1] - bins[0]))
# 添加标题和坐标轴标签
plt.title('皮尔逊相关系数分布图')
plt.xlabel('区间')
plt.ylabel('统计数')
plt.savefig('皮尔逊相关性系数.png')
plt.close()
#设置画布大小
plt.figure(figsize=(10, 6))
data = correlation_df['Spearman_Correlation'].values.tolist()
# 计算每个区间的统计数(这里是区间内数据的数量)
hist_values = [np.sum((data >= bins[i]) & (data < bins[i + 1])) for i in range(len(bins) - 1)]
# 绘制直方图
plt.bar(bins[:-1], hist_values, width=(bins[1] - bins[0]))
# 添加标题和坐标轴标签
plt.title('斯皮尔曼相关系数分布图')
plt.xlabel('区间')
plt.ylabel('统计数')
plt.savefig('斯皮尔曼相关性系数.png')
plt.close()
# 邮件封装
class SendMail(object):
def __init__(self,username,passwd,recv,title,content,
file=None,ssl=False,
email_host='smtp.qq.com',port=25,ssl_port=465):
'''
:param username: 用户名
:param passwd: 密码
:param recv: 收件人多个要传list ['a@qq.com','b@qq.com]
:param title: 邮件标题
:param content: 邮件正文
:param file: 附件路径如果不在当前目录下要写绝对路径默认没有附件
:param ssl: 是否安全链接默认为普通
:param email_host: smtp服务器地址默认为163服务器
:param port: 非安全链接端口默认为25
:param ssl_port: 安全链接端口默认为465
'''
self.username = username #用户名
self.passwd = passwd #密码
self.recv = recv #收件人多个要传list ['a@qq.com','b@qq.com]
self.title = title #邮件标题
self.content = content #邮件正文
self.file = file #附件路径,如果不在当前目录下,要写绝对路径
self.email_host = email_host #smtp服务器地址
self.port = port #普通端口
self.ssl = ssl #是否安全链接
self.ssl_port = ssl_port #安全链接端口
def send_mail(self):
msg = MIMEMultipart()
#发送内容的对象
if self.file:#处理附件的
file_name = os.path.split(self.file)[-1]#只取文件名,不取路径
try:
f = open(self.file, 'rb').read()
except Exception as e:
raise Exception('附件打不开!!!!')
else:
att = MIMEText(f,"base64", "utf-8")
att["Content-Type"] = 'application/octet-stream'
#base64.b64encode(file_name.encode()).decode()
new_file_name='=?utf-8?b?' + base64.b64encode(file_name.encode()).decode() + '?='
#这里是处理文件名为中文名的,必须这么写
att["Content-Disposition"] = 'attachment; filename="%s"'%(new_file_name)
msg.attach(att)
msg.attach(MIMEText(self.content))#邮件正文的内容
msg['Subject'] = self.title # 邮件主题
msg['From'] = self.username # 发送者账号
msg['To'] = ','.join(self.recv) # 接收者账号列表
if self.ssl:
self.smtp = smtplib.SMTP_SSL(self.email_host,port=self.ssl_port)
else:
self.smtp = smtplib.SMTP(self.email_host,port=self.port)
#发送邮件服务器的对象
self.smtp.login(self.username,self.passwd)
try:
self.smtp.sendmail(self.username,self.recv,msg.as_string())
pass
except Exception as e:
print('出错了。。',e)
logger.info('邮件服务出错了。。',e)
else:
print('发送成功!')
logger.info('邮件发送成功!')
self.smtp.quit()
def dateConvert(df, datecol='ds'):
# 将date列转换为datetime类型
try:
df[datecol] = pd.to_datetime(df[datecol],format=r'%Y-%m-%d')
except:
df[datecol] = pd.to_datetime(df[datecol],format=r'%Y/%m/%d')
return df
class SQLiteHandler:
def __init__(self, db_name):
self.db_name = db_name
self.connection = None
self.cursor = None
def connect(self):
self.connection = sqlite3.connect(self.db_name)
self.cursor = self.connection.cursor()
def close(self):
if self.connection:
self.connection.close()
self.connection = None
self.cursor = None
def execute_query(self, query, params=None):
if params:
return self.cursor.execute(query, params)
else:
return self.cursor.execute(query)
def commit(self):
self.connection.commit()
def create_table(self, table_name, columns):
query = f"CREATE TABLE IF NOT EXISTS {table_name} ({columns})"
self.execute_query(query)
self.commit()
def insert_data(self, table_name, values, columns=None):
if columns:
placeholders = ', '.join(['?'] * len(values))
query = f"INSERT INTO {table_name} ({', '.join(columns)}) VALUES ({placeholders})"
else:
placeholders = ', '.join(['?'] * len(values))
query = f"INSERT INTO {table_name} VALUES ({placeholders})"
self.execute_query(query, values)
self.commit()
def select_data(self, table_name, columns=None, where_condition=None, order_by=None, limit=None):
query = f"SELECT {', '.join(columns) if columns else '*'} FROM {table_name}"
if where_condition:
query += f" WHERE {where_condition}"
if order_by:
query += f" ORDER BY {order_by}"
if limit:
query += f" LIMIT {limit}"
results = self.execute_query(query).fetchall()
if results:
headers = [description[0] for description in self.execute_query(query).description]
return pd.DataFrame(results, columns=headers)
else:
return pd.DataFrame()
def update_data(self, table_name, set_values, where_condition):
query = f"UPDATE {table_name} SET {set_values} WHERE {where_condition}"
logger.info('更新数据sql'+ query)
self.execute_query(query)
self.commit()
def delete_data(self, table_name, where_condition):
query = f"DELETE FROM {table_name} WHERE {where_condition}"
self.execute_query(query)
self.commit()
def check_table_exists(self, table_name):
query = f"SELECT name FROM sqlite_master WHERE type='table' AND name='{table_name}'"
result = self.execute_query(query).fetchone()
return result is not None
def add_column_if_not_exists(self, table_name, column_name, column_type):
# 查询表结构
query = f"PRAGMA table_info({table_name})"
self.execute_query(query)
columns = [column[1] for column in self.cursor.fetchall()]
# 判断列是否存在
if column_name not in columns:
# 如果列不存在,则添加列
query = f"ALTER TABLE {table_name} ADD COLUMN {column_name} {column_type}"
self.execute_query(query)
self.commit()
print(f"Column '{column_name}' added to table '{table_name}' successfully.")
else:
print(f"Column '{column_name}' already exists in table '{table_name}'.")
if __name__ == '__main__':
print('This is a tool, not a script.')

176
main.py Normal file
View File

@ -0,0 +1,176 @@
# 读取配置
# from config_jingbo import *
# from config_tansuanli import *
from config_juxiting import *
from lib.dataread import *
from lib.tools import *
from models.nerulforcastmodels import ex_Model,model_losss,brent_export_pdf,tansuanli_export_pdf,pp_export_pdf
import glob
import torch
torch.set_float32_matmul_precision("high")
sqlitedb = SQLiteHandler(db_name)
sqlitedb.connect()
def predict_main():
signature = BinanceAPI(APPID, SECRET)
etadata = EtaReader(signature=signature,
classifylisturl = classifylisturl,
classifyidlisturl=classifyidlisturl,
edbcodedataurl=edbcodedataurl,
edbcodelist=edbcodelist,
edbdatapushurl=edbdatapushurl,
edbdeleteurl=edbdeleteurl,
edbbusinessurl=edbbusinessurl
)
# 获取数据
if is_eta:
# eta数据
logger.info('从eta获取数据...')
signature = BinanceAPI(APPID, SECRET)
etadata = EtaReader(signature=signature,
classifylisturl = classifylisturl,
classifyidlisturl=classifyidlisturl,
edbcodedataurl=edbcodedataurl,
edbcodelist=edbcodelist,
edbdatapushurl=edbdatapushurl,
edbdeleteurl=edbdeleteurl,
edbbusinessurl=edbbusinessurl,
)
# df_zhibiaoshuju,df_zhibiaoliebiao = etadata.get_eta_api_yuanyou_data(data_set=data_set,dataset=dataset) # 原始数据,未处理
df_zhibiaoshuju,df_zhibiaoliebiao = etadata.get_eta_api_pp_data(data_set=data_set,dataset=dataset) # 原始数据,未处理
# 数据处理
df = datachuli(df_zhibiaoshuju,df_zhibiaoliebiao,y = y,dataset=dataset,add_kdj=add_kdj,is_timefurture=is_timefurture,end_time=end_time)
else:
logger.info('读取本地数据:'+os.path.join(dataset,data_set))
df = getdata(filename=os.path.join(dataset,data_set),y=y,dataset=dataset,add_kdj=add_kdj,is_timefurture=is_timefurture,end_time=end_time) # 原始数据,未处理
# 更改预测列名称
df.rename(columns={y:'y'},inplace=True)
if is_edbnamelist:
df = df[edbnamelist]
df.to_csv(os.path.join(dataset,'指标数据.csv'), index=False)
# 保存最新日期的y值到数据库
# 取第一行数据存储到数据库中
first_row = df[['ds','y']].tail(1)
# 将最新真实值保存到数据库
if not sqlitedb.check_table_exists('trueandpredict'):
first_row.to_sql('trueandpredict',sqlitedb.connection,index=False)
else:
for row in first_row.itertuples(index=False):
row_dict = row._asdict()
check_query = sqlitedb.select_data('trueandpredict',where_condition = f"ds = '{row.ds}'")
if len(check_query) > 0:
set_clause = ", ".join([f"{key} = '{value}'" for key, value in row_dict.items()])
sqlitedb.update_data('trueandpredict',set_clause,where_condition = f"ds = '{row.ds}'")
continue
sqlitedb.insert_data('trueandpredict',tuple(row_dict.values()),columns=row_dict.keys())
import datetime
# 判断当前日期是不是周一
is_weekday = datetime.datetime.now().weekday() == 3
if is_weekday:
logger.info('今天是周一,更新预测模型')
# 计算最近20天预测残差最低的模型名称
model_results = sqlitedb.select_data('trueandpredict',order_by = "ds DESC",limit = "20")
model_results = model_results.dropna()
modelnames = model_results.columns.to_list()[2:]
for col in model_results[modelnames].select_dtypes(include=['object']).columns:
model_results[col] = model_results[col].astype(np.float32)
# 计算每个预测值与真实值之间的偏差率
for model in modelnames:
model_results[f'{model}_abs_error_rate'] = abs(model_results['y'] - model_results[model]) / model_results['y']
# 获取每行对应的最小偏差率值
min_abs_error_rate_values = model_results.apply(lambda row: row[[f'{model}_abs_error_rate' for model in modelnames]].min(), axis=1)
# 获取每行对应的最小偏差率值对应的列名
min_abs_error_rate_column_name = model_results.apply(lambda row: row[[f'{model}_abs_error_rate' for model in modelnames]].idxmin(), axis=1)
# 将列名索引转换为列名
min_abs_error_rate_column_name = min_abs_error_rate_column_name.map(lambda x: x.split('_')[0])
# 取出现次数最多的模型名称
most_common_model = min_abs_error_rate_column_name.value_counts().idxmax()
logger.info(f"最近20天预测残差最低的模型名称{most_common_model}")
# 保存结果到数据库
if not sqlitedb.check_table_exists('most_model'):
sqlitedb.create_table('most_model',columns="ds datetime, most_common_model TEXT")
sqlitedb.insert_data('most_model',(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),most_common_model,),columns=('ds','most_common_model',))
if is_corr:
df = corr_feature(df=df)
df1 = df.copy() # 备份一下后面特征筛选完之后加入ds y 列用
logger.info(f"开始训练模型...")
row,col = df.shape
now = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
# ex_Model(df,
# horizon=horizon,
# input_size=input_size,
# train_steps=train_steps,
# val_check_steps=val_check_steps,
# early_stop_patience_steps=early_stop_patience_steps,
# is_debug=is_debug,
# dataset=dataset,
# is_train=is_train,
# is_fivemodels=is_fivemodels,
# val_size=val_size,
# test_size=test_size,
# settings=settings,
# now=now,
# etadata = etadata,
# modelsindex = modelsindex,
# data = data,
# is_eta=is_eta,
# )
# # 模型评估
model_results3 = model_losss(sqlitedb)
# 模型报告
title = f'{settings}--{now}-预测报告' # 报告标题
brent_export_pdf(dataset=dataset,num_models = 5 if is_fivemodels else 22,time=end_time,
reportname=reportname,sqlitedb=sqlitedb),
# pp_export_pdf(dataset=dataset,num_models = 5 if is_fivemodels else 22,time=end_time,
# reportname=reportname),
logger.info('模型训练完成')
# tansuanli_export_pdf(dataset=dataset,num_models = 5 if is_fivemodels else 22,end_time=end_time,reportname=reportname)
# # LSTM 单变量模型
# ex_Lstm(df,input_seq_len=input_size,output_seq_len=horizon,is_debug=is_debug,dataset=dataset)
# # lstm 多变量模型
# ex_Lstm_M(df,n_days=input_size,out_days=horizon,is_debug=is_debug,datasetpath=dataset)
# # GRU 模型
# # ex_GRU(df)
# 发送邮件
m = SendMail(
username=username,
passwd=passwd,
recv=recv,
title=title,
content=content,
file=max(glob.glob(os.path.join(dataset,'*.pdf')), key=os.path.getctime),
ssl=ssl,
)
# m.send_mail()
if __name__ == '__main__':
predict_main()

123
maincanshu.py Normal file
View File

@ -0,0 +1,123 @@
# 读取配置
from config_jingbo import *
from lib.tools import *
from lib.dataread import *
from models.nerulforcastmodels import ex_Model,model_losss,brent_export_pdf
from models.lstmmodels import ex_Lstm_M,ex_Lstm
from models.grumodels import ex_GRU
import glob
import torch
torch.set_float32_matmul_precision("high")
if __name__ == '__main__':
signature = BinanceAPI(APPID, SECRET)
# 遍历参数训练模型
input_size_list = [14]
horizon_list = [7]
train_steps_list = [500,1000,1500,2000]
k_list = [10,18,25,50,100]
end_time_list = ['2024-07-03']
is_debug = False
is_fivemodels = False # 是否使用之前保存的最佳的5个模型
delweekenday = True
# 组合上面三个参数
for i in range(len(input_size_list)):
for h in range(len(horizon_list)):
for j in range(len(train_steps_list)):
for k in range(len(k_list)):
for end_time in end_time_list:
input_size = input_size_list[i]
horizons = horizon_list[h]
train_steps = train_steps_list[j]
K = k_list[k]
settings = f'{input_size}-{horizon_list[h]}-{train_steps}-{K}-{data_set}-{end_time}-{y}'
logger.info(f'当前配置:{settings}')
# 获取数据
if is_eta:
etadata = EtaReader(signature=signature,
classifylisturl = classifylisturl,
classifyidlisturl=classifyidlisturl,
edbcodedataurl=edbcodedataurl,
edbcodelist=edbcodelist
)
df = etadata.get_eta_api_data(data_set=data_set,dataset=dataset) # 原始数据,未处理
else:
filename = os.path.join(dataset,data_set)
logger.info(f'未启用Eta数据,将读取本地数据{filename}')
df = pd.read_excel(filename,sheet_name='指标数据')
# 数据处理
df = datachuli(df=df,dataset=dataset,end_time=end_time,y=y,delweekenday=delweekenday)
if is_timefurture:
df = addtimecharacteristics(df=df,dataset=dataset)
# 更改预测列名称
df.rename(columns={y:'y'},inplace=True)
logger.info(f"开始训练模型...")
row,col = df.shape
logger.info(f'当前配置:{settings}')
# 获取日期时间 计算今天日期 %Y-%m-%d-%H-%M-%S
from datetime import datetime
now = datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
ex_Model(df,
horizon=horizon,
input_size=input_size,
train_steps=train_steps,
val_check_steps=val_check_steps,
early_stop_patience_steps=early_stop_patience_steps,
is_debug=is_debug,
dataset=dataset,
is_train=is_train,
is_fivemodels=is_fivemodels,
val_size=val_size,
test_size=test_size,
settings=settings,
now=now
)
# 模型评估
model_results3 = model_losss(df,dataset=dataset,horizon=horizon)
# 模型报告
reportname = f'{settings}--{now}-预测报告.pdf' # 报告文件名
reportname = reportname.replace(':', '-') # 替换冒号
title = f'{settings}--{now}-预测报告' # 报告标题
brent_export_pdf(dataset=dataset,num_models = 5 if is_fivemodels else 22,time=end_time,
reportname=reportname),
# 发送邮件
m = SendMail(
username=username,
passwd=passwd,
recv=recv,
title=title,
content=content,
file=max(glob.glob(os.path.join(dataset,reportname)), key=os.path.getctime),
ssl=ssl,
)
# m.send_mail()
# # LSTM 单变量模型
# ex_Lstm(df,input_seq_len=input_size,output_seq_len=horizon,is_debug=is_debug,dataset=dataset)
# # lstm 多变量模型
# ex_Lstm_M(df,n_days=input_size,out_days=horizon,is_debug=is_debug,datasetpath=dataset)
# # GRU 模型
# # ex_GRU(df)
# 发送邮件
# m = SendMail(
# username=username,
# passwd=passwd,
# recv=recv,
# title=title,
# content=content,
# file=max(glob.glob(os.path.join(dataset,'*.pdf')), key=os.path.getctime),
# ssl=ssl,
# )
# m.send_mail()

164
models/grumodels.py Normal file
View File

@ -0,0 +1,164 @@
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.rcParams['font.family'] = 'SimHei' # 设置字体为黑体
import random
import string
import time
import base64
import requests
from hashlib import sha256
from hmac import HMAC
from math import sqrt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import GRU, Dense, Dropout
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
# 数据获取和预处理部分
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
import datetime
import string
import base64
import requests
import random
import time
import re
import hmac
import hashlib
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
'''
将时间序列数据转换为监督学习数据
:param data:数据集
:param n_in: 输入序列长度默认为1
:param n_out:输出序列长度默认为1
:param dropnan:
:return:
'''
n_vars = 1 if type(data) is list else data.shape[1]
df = pd.DataFrame(data)
cols, names = list(), list()
# input sequence (t-n, ... t-1)
# 将3组输入数据依次向下移动321行将数据加入cols列表技巧(n_in, 0, -1)中的-1指倒序循环步长为1
for i in range(n_in, 0, -1):
cols.append(df.shift(i))
names += [('var%d(t-%d)' % (j + 1, i)) for j in range(n_vars)]
# forecast sequence (t, t+1, ... t+n)
# 将一组输出数据加入cols列表技巧其中i=0
for i in range(0, n_out):
cols.append(df.shift(-i))
if i == 0:
names += [('var%d(t)' % (j + 1)) for j in range(n_vars)]
else:
names += [('var%d(t+%d)' % (j + 1, i)) for j in range(n_vars)]
# cols列表(list)中现在有四块经过下移后的数据(即df(-3),df(-2),df(-1),df),将四块数据按列 并排合并
agg = pd.concat(cols, axis=1)
# 给合并后的数据添加列名
agg.columns = names
# 删除NaN值列
if dropnan:
agg.dropna(inplace=True)
return agg
def ex_GRU(df):
dataset = df.copy()
dataset.set_index('ds', inplace=True)
values = dataset.values
# 标准化/放缩 特征值在0,1之间
scaler = MinMaxScaler(feature_range=(0, 1))
scaled = scaler.fit_transform(values)
# 数据准备
n_days = 14 # 使用过去14天的数据
n_features = scaled.shape[1] # 特征数量根据实际数据调整
reframed = series_to_supervised(scaled, n_days, 1)
# 划分训练集和测试集
values = reframed.values
n_train_days = int(values.shape[0] * 0.8)
train = values[:n_train_days, :]
test = values[n_train_days:, :]
# 输入输出数据
n_obs = n_days * n_features
train_X, train_y = train[:, :n_obs], train[:, -n_features]
test_X, test_y = test[:, :n_obs], test[:, -n_features]
# 输入数据重塑为 [样本数, 时间步长, 特征数]
train_X = train_X.reshape((train_X.shape[0], n_days, n_features))
test_X = test_X.reshape((test_X.shape[0], n_days, n_features))
# 构造GRU模型
model = Sequential()
model.add(GRU(50, return_sequences=True, input_shape=(n_days, n_features)))
model.add(Dropout(0.2))
model.add(GRU(50))
model.add(Dropout(0.2))
model.add(Dense(1))
# 编译模型
optimizer = Adam(learning_rate=0.001)
model.compile(loss='mean_squared_error', optimizer=optimizer)
# 定义回调函数
early_stopping = EarlyStopping(monitor='val_loss', patience=10)
# 训练模型
history = model.fit(train_X, train_y, epochs=100, batch_size=72, validation_data=(test_X, test_y), verbose=2, shuffle=False, callbacks=[early_stopping])
# 执行预测
yhat = model.predict(test_X)
test_X = test_X.reshape((test_X.shape[0], n_days * n_features))
# 将预测列和真实列数据逆归一化
inv_yhat = np.concatenate((yhat, test_X[:, -n_features+1:]), axis=1)
inv_yhat = scaler.inverse_transform(inv_yhat)
inv_yhat = inv_yhat[:, 0]
test_y = test_y.reshape((len(test_y), 1))
inv_y = np.concatenate((test_y, test_X[:, -n_features+1:]), axis=1)
inv_y = scaler.inverse_transform(inv_y)
inv_y = inv_y[:, 0]
# 计算RMSE
rmse = sqrt(mean_squared_error(inv_y, inv_yhat))
print('Test RMSE: %.3f' % rmse)
# 可视化结果
n = 150
time_axis_data = np.array(range(n))
time_axis_future = np.array(range(n + 7))
inv_y = inv_y[-n:]
inv_yhat = inv_yhat[-n-7:]
fig, ax = plt.subplots(2, 1, gridspec_kw={'height_ratios': [5, 4]})
fig.set_size_inches(8, 6)
ax[0].plot(time_axis_data, inv_y, label='历史价格')
ax[0].plot(time_axis_future, inv_yhat, linestyle='dashed', label='预测价格')
ax[0].set_xlabel('时间')
ax[0].set_ylabel('价格')
ax[0].legend()
ax[0].set_title('布伦特_多价格预测')
ax[0].set_ylim(min(inv_y[-n - 7:]) * 0.4, max(inv_y[-n - 7:]) * 1.6)
ax[1].axis('off')
table_data = [[f"Day {i + 1}", "{:.2f}".format(val)] for i, val in enumerate(inv_yhat[-7:])]
table = ax[1].table(cellText=table_data, colLabels=['Day', 'Prediction'], loc='center')
table.auto_set_font_size(True)
filename = os.path.basename(__file__).split('.')[0]
plt.savefig(filename + '.png')
plt.show()

255
models/lstmmodels.py Normal file
View File

@ -0,0 +1,255 @@
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import datetime
import matplotlib.pyplot as plt
import pandas as pd
import os
import random
import string
import time
import base64
from hashlib import sha256
from hmac import HMAC
import requests
import csv
from numpy import concatenate
from math import sqrt
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
'''
将时间序列数据转换为监督学习数据
:param data:数据集
:param n_in: 输入序列长度默认为1
:param n_out:输出序列长度默认为1
:param dropnan:
:return:
'''
n_vars = 1 if type(data) is list else data.shape[1]
df = pd.DataFrame(data)
cols, names = list(), list()
# input sequence (t-n, ... t-1)
# 将3组输入数据依次向下移动321行将数据加入cols列表技巧(n_in, 0, -1)中的-1指倒序循环步长为1
for i in range(n_in, 0, -1):
cols.append(df.shift(i))
names += [('var%d(t-%d)' % (j + 1, i)) for j in range(n_vars)]
# forecast sequence (t, t+1, ... t+n)
# 将一组输出数据加入cols列表技巧其中i=0
for i in range(0, n_out):
cols.append(df.shift(-i))
if i == 0:
names += [('var%d(t)' % (j + 1)) for j in range(n_vars)]
else:
names += [('var%d(t+%d)' % (j + 1, i)) for j in range(n_vars)]
# cols列表(list)中现在有四块经过下移后的数据(即df(-3),df(-2),df(-1),df),将四块数据按列 并排合并
agg = pd.concat(cols, axis=1)
# 给合并后的数据添加列名
agg.columns = names
# 删除NaN值列
if dropnan:
agg.dropna(inplace=True)
return agg
def createXY(dataset,n_past):
dataX = []
dataY = []
print(dataset.shape[1])
for i in range(n_past, len(dataset)):
dataX.append(dataset[i - n_past:i, 0:dataset.shape[1]])
dataY.append(dataset[i,0])
return np.array(dataX),np.array(dataY)
def ex_Lstm_M(df,n_days=14,out_days=7,is_debug=False,datasetpath=''):
# dataset = pd.read_csv('brentpricepredict.csv',encoding='utf-8')
dataset = df.copy()
dataset.set_index('ds', inplace=True)
values = dataset.values
if is_debug:
# values = values[-1000:]
pass
# 标准化/放缩 特征值在0,1之间
scaler = MinMaxScaler(feature_range=(0, 1))
scaled = scaler.fit_transform(values)
# 用14天数据预测七天数据
n_features = dataset.shape[1]
# 构造一个14->7的监督学习型数据
reframed = series_to_supervised(scaled, n_days, out_days)
# 切分数据集
values = reframed.values
# 用80%的数据来训练20%的数据来测试
n_train = int(len(dataset) * 0.8)
train = values[:n_train, :]
test = values[n_train:, :]
# 切分输入输出
n_obs = n_days * n_features
# 倒数第19列作为Y
train_X, train_y = train[:, :n_obs], train[:, -n_features]
test_X, test_y = test[:, :n_obs], test[:, -n_features]
# 将数据转换为3D输入timesteps=1414条数据预测7条 [samples, timesteps, features]
train_X = train_X.reshape((train_X.shape[0], n_days, n_features))
test_X = test_X.reshape((test_X.shape[0], n_days, n_features))
print(train_X.shape, train_y.shape, test_X.shape, test_y.shape)
# 设计网络
model = Sequential()
model.add(LSTM(50, input_shape=(train_X.shape[1], train_X.shape[2])))
model.add(Dense(1))
model.compile(loss='mae', optimizer='adam')
# 拟合网络
history = model.fit(train_X, train_y, epochs=100, batch_size=72, validation_data=(test_X, test_y), verbose=2,
shuffle=False)
# 执行预测
yhat = model.predict(test_X)
# 将数据格式化成 n行 * 24列
test_X = test_X.reshape((test_X.shape[0], n_days * n_features))
# 将预测列据和后7列数据拼接因后续逆缩放时数据形状要符合 n行*20列 的要求
inv_yhat = concatenate((yhat, test_X[:, -n_features+1:]), axis=1)
# 对拼接好的数据进行逆缩放
inv_yhat = scaler.inverse_transform(inv_yhat)
inv_yhat = inv_yhat[:, 0]
print(inv_yhat)
test_y = test_y.reshape((len(test_y), 1))
# 将真实列据和后7列数据拼接因后续逆缩放时数据形状要符合 n行*20列 的要求
inv_y = concatenate((test_y, test_X[:, -n_features+1:]), axis=1)
# 对拼接好的数据进行逆缩放
inv_y = scaler.inverse_transform(inv_y)
inv_y = inv_y[:, 0]
# 计算RMSE
rmse = sqrt(mean_squared_error(inv_y, inv_yhat))
print('Test RMSE: %.3f' % rmse)
# 可视化结果
# 保留n天历史数据
n = len(inv_y) - 7
# 设置要可视化的值
time_axis_data = np.array(range(n))
time_axis_future = np.array(range(n + 7))
inv_y = inv_y[-n:]
inv_yhat = inv_yhat[-n-7:]
# Plot data and future predictions
fig, ax = plt.subplots(2, 1, gridspec_kw={'height_ratios': [5, 4]})
# 设置画布大小
fig.set_size_inches(6, 6)
# 第一个子图画历史价格和预测价格
ax[0].plot(time_axis_data, inv_y, label='历史价格')
ax[0].plot(time_axis_future, inv_yhat, linestyle='dashed', label='预测价格')
ax[0].set_xlabel('时间')
ax[0].set_ylabel('价格')
ax[0].legend()
# 设置标题
ax[0].set_title('布伦特_多价格预测')
# 设置y轴范围
ax[0].set_ylim(50, 120)
# 第二个子图画表格,展示预测价格
ax[1].axis('off')
table_data = [[f"Day {i + 1}", "{:.2f}".format(val)] for i, val in enumerate(inv_yhat[-7:])]
table = ax[1].table(cellText=table_data, colLabels=['Day', 'Prediction'], loc='center')
# 设置表格内容居中
table.auto_set_font_size(True)
# 保存图片
filename = os.path.basename(__file__).split('.')[0]
plt.savefig(os.path.join(datasetpath,filename + '_M.png'))
# plt.show()
def ex_Lstm(df,input_seq_len=50, output_seq_len=7,is_debug=False,dataset=''):
# 将日期列转换为 datetime 类型(如果尚未转换)
df['ds'] = pd.to_datetime(df['ds'])
# 分离出数值列(排除日期列)
numeric_df = df.select_dtypes(include=['int64', 'float64'])
prices = df
# prices = df
# print(data1)
# Remove any NaN values
df = df.drop('ds', axis=1)
prices = np.array(df, dtype=float) # convert to NumPy array of floats
prices = prices[~np.isnan(prices)]
if is_debug:
prices = prices[-300:]
# Prepare input sequences
inputs = []
for i in range(len(prices)-input_seq_len-output_seq_len+1):
inputs.append(prices[i:i+input_seq_len])
inputs = np.array(inputs)
# Prepare output sequences
outputs = []
for i in range(input_seq_len, len(prices)-output_seq_len+1):
outputs.append(prices[i:i+output_seq_len])
outputs = np.array(outputs)
# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(inputs, outputs, test_size=0.2)
# Normalize data
scaler_in = MinMaxScaler()
X_train = scaler_in.fit_transform(X_train)
X_test = scaler_in.transform(X_test)
scaler_out = MinMaxScaler()
y_train = scaler_out.fit_transform(y_train)
y_test = scaler_out.transform(y_test)
# Define LSTM model
model = Sequential()
model.add(LSTM(128, activation='relu', input_shape=(input_seq_len, 1)))
model.add(Dense(output_seq_len))
model.compile(optimizer='adam', loss='mse')
# Train LSTM model
model.fit(X_train.reshape(-1, input_seq_len, 1), y_train, epochs=100, batch_size=64, validation_data=(X_test.reshape(-1, input_seq_len, 1), y_test))
# Evaluate LSTM model
mse = model.evaluate(X_test.reshape(-1, input_seq_len, 1), y_test)
# Make future predictions
future_inputs = np.array([prices[-input_seq_len:]])
future_inputs = scaler_in.transform(future_inputs)
future_predictions = model.predict(future_inputs.reshape(-1, input_seq_len, 1))
future_predictions = scaler_out.inverse_transform(future_predictions)[0]
# Print results
print("MSE: ", mse)
print("Future predictions: ", future_predictions)
# Generate time axis for data and future predictions
time_axis_data = np.arange(len(prices))
time_axis_future = np.arange(len(prices), len(prices) + len(future_predictions))
# Concatenate time axis and data
time_axis = np.concatenate((time_axis_data, time_axis_future))
# Concatenate data and future predictions
data_and_predictions = np.concatenate((prices, future_predictions))
# Plot data and future predictions
fig, ax = plt.subplots(2, 1, gridspec_kw={'height_ratios': [3, 1]})
# First subplot: Data and Future Predictions
ax[0].plot(time_axis, data_and_predictions, label='Data and Future Predictions')
ax[0].plot(time_axis_future, future_predictions, linestyle='dashed', label='Future Predictions')
ax[0].set_xlabel('Time')
ax[0].set_ylabel('Price')
ax[0].legend()
# Second subplot: Table for Future Predictions
ax[1].axis('off')
table_data = [[f"Day {i+1}", "{:.2f}".format(val)] for i, val in enumerate(future_predictions)]
table = ax[1].table(cellText=table_data, colLabels=['Day', 'Prediction'], loc='center')
plt.savefig(os.path.join(dataset,'lstmmodels.png'))
# plt.show()

1519
models/nerulforcastmodels.py Normal file

File diff suppressed because it is too large Load Diff

104
pushdata.py Normal file
View File

@ -0,0 +1,104 @@
# 读取配置
from config_jingbo import *
# from config_tansuanli import *
from lib.tools import *
from lib.dataread import *
from models.nerulforcastmodels import ex_Model,model_losss,brent_export_pdf,tansuanli_export_pdf
from models.lstmmodels import ex_Lstm_M,ex_Lstm
from models.grumodels import ex_GRU
import glob
import torch
torch.set_float32_matmul_precision("high")
if __name__ == '__main__':
signature = BinanceAPI(APPID, SECRET)
etadata = EtaReader(signature=signature,
classifylisturl = classifylisturl,
classifyidlisturl=classifyidlisturl,
edbcodedataurl=edbcodedataurl,
edbcodelist=edbcodelist,
edbdatapushurl = edbdatapushurl,
edbdeleteurl = edbdeleteurl,
edbbusinessurl = edbbusinessurl
)
models = [
'NHITS',
'Informer',
'LSTM',
'iTransformer',
'TSMixer',
'TSMixerx',
'PatchTST',
'RNN',
'GRU',
'TCN',
'BiTCN',
'DilatedRNN',
'MLP',
'DLinear',
'NLinear',
'TFT',
'FEDformer',
'StemGNN',
'MLPMultivariate',
'TiDE',
'DeepNPT']
# eta自由数据指标编码
modelsindex = {
'NHITS': 'SELF0000001',
'Informer':'SELF0000057',
'LSTM':'SELF0000058',
'iTransformer':'SELF0000059',
'TSMixer':'SELF0000060',
'TSMixerx':'SELF0000061',
'PatchTST':'SELF0000062',
'RNN':'SELF0000063',
'GRU':'SELF0000064',
'TCN':'SELF0000065',
'BiTCN':'SELF0000066',
'DilatedRNN':'SELF0000067',
'MLP':'SELF0000068',
'DLinear':'SELF0000069',
'NLinear':'SELF0000070',
'TFT':'SELF0000071',
'FEDformer':'SELF0000072',
'StemGNN':'SELF0000073',
'MLPMultivariate':'SELF0000074',
'TiDE':'SELF0000075',
'DeepNPT':'SELF0000076'
}
# df_predict = pd.read_csv('dataset/predict.csv',encoding='gbk')
# # df_predict.rename(columns={'ds':'Date'},inplace=True)
# for m in modelsindex.keys():
# list = []
# for date,value in zip(df_predict['ds'],df_predict[m]):
# list.append({'Date':date,'Value':value})
# data['DataList'] = list
# data['IndexCode'] = modelsindex[m]
# data['IndexName'] = f'价格预测{m}模型'
# data['Remark'] = m
# # print(data['DataList'])
# etadata.push_data(data)
# 删除指标
# IndexCodeList = ['SELF0000055']
# for i in range(1,57):
# if i < 10 : i = f'0{i}'
# IndexCodeList.append(f'SELF00000{i}')
# print(IndexCodeList)
# etadata.del_zhibiao(IndexCodeList)
# 删除特定日期的值
indexcodelist = modelsindex.values()
for indexcode in indexcodelist:
data = {
"IndexCode": indexcode, #指标编码
"StartDate": "2020-04-20", #指标需要删除的开始日期(>=),如果开始日期和结束日期相等,那么就是删除该日期
"EndDate": "2024-05-28" #指标需要删除的结束日期(<=),如果开始日期和结束日期相等,那么就是删除该日期
}
# etadata.del_business(data)

62
八个维度demo copy.py Normal file
View File

@ -0,0 +1,62 @@
import logging
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from neuralforecast import NeuralForecast
from neuralforecast.models import NHITS
from neuralforecast.utils import AirPassengersPanel
from mlforecast.utils import PredictionIntervals
from neuralforecast.losses.pytorch import DistributionLoss, MAE
os.environ['NIXTLA_ID_AS_COL'] = '1'
AirPassengersPanel_train = AirPassengersPanel[AirPassengersPanel['ds'] < AirPassengersPanel['ds'].values[-12]].reset_index(drop=True)
AirPassengersPanel_test = AirPassengersPanel[AirPassengersPanel['ds'] >= AirPassengersPanel['ds'].values[-12]].reset_index(drop=True)
AirPassengersPanel_test['y'] = np.nan
AirPassengersPanel_test['y_[lag12]'] = np.nan
horizon = 12
input_size = 24
prediction_intervals = PredictionIntervals()
models = [NHITS(h=horizon, input_size=input_size, max_steps=100, loss=MAE(), scaler_type="robust"),
NHITS(h=horizon, input_size=input_size, max_steps=100, loss=DistributionLoss("Normal", level=[90]), scaler_type="robust")]
nf = NeuralForecast(models=models, freq='ME')
nf.fit(AirPassengersPanel_train, prediction_intervals=prediction_intervals)
preds = nf.predict(futr_df=AirPassengersPanel_test, level=[90])
fig, (ax1, ax2) = plt.subplots(2, 1, figsize = (20, 7))
plot_df = pd.concat([AirPassengersPanel_train, preds])
plot_df = plot_df[plot_df['unique_id']=='Airline1'].drop(['unique_id','trend','y_[lag12]'], axis=1).iloc[-50:]
ax1.plot(plot_df['ds'], plot_df['y'], c='black', label='True')
ax1.plot(plot_df['ds'], plot_df['NHITS'], c='blue', label='median')
ax1.fill_between(x=plot_df['ds'][-12:],
y1=plot_df['NHITS-lo-90'][-12:].values,
y2=plot_df['NHITS-hi-90'][-12:].values,
alpha=0.4, label='level 90')
ax1.set_title('AirPassengers Forecast - Uncertainty quantification using Conformal Prediction', fontsize=18)
ax1.set_ylabel('Monthly Passengers', fontsize=15)
ax1.set_xticklabels([])
ax1.legend(prop={'size': 10})
ax1.grid()
ax2.plot(plot_df['ds'], plot_df['y'], c='black', label='True')
ax2.plot(plot_df['ds'], plot_df['NHITS1'], c='blue', label='median')
ax2.fill_between(x=plot_df['ds'][-12:],
y1=plot_df['NHITS1-lo-90'][-12:].values,
y2=plot_df['NHITS1-hi-90'][-12:].values,
alpha=0.4, label='level 90')
ax2.set_title('AirPassengers Forecast - Uncertainty quantification using Normal distribution', fontsize=18)
ax2.set_ylabel('Monthly Passengers', fontsize=15)
ax2.set_xlabel('Timestamp [t]', fontsize=15)
ax2.legend(prop={'size': 10})
ax2.grid()

200
八个维度demo.py Normal file
View File

@ -0,0 +1,200 @@
import pandas as pd
from datasetsforecast.long_horizon import LongHorizon
# Change this to your own data to try the model
Y_df, _, _ = LongHorizon.load(directory='./', group='ETTm2')
Y_df['ds'] = pd.to_datetime(Y_df['ds'])
# For this excercise we are going to take 20% of the DataSet
n_time = len(Y_df.ds.unique())
val_size = int(.2 * n_time)
test_size = int(.2 * n_time)
Y_df.groupby('unique_id').head(2)
import matplotlib.pyplot as plt
# We are going to plot the temperature of the transformer
# and marking the validation and train splits
u_id = 'HUFL'
x_plot = pd.to_datetime(Y_df[Y_df.unique_id==u_id].ds)
y_plot = Y_df[Y_df.unique_id==u_id].y.values
x_val = x_plot[n_time - val_size - test_size]
x_test = x_plot[n_time - test_size]
fig = plt.figure(figsize=(10, 5))
fig.tight_layout()
plt.plot(x_plot, y_plot)
plt.xlabel('Date', fontsize=17)
plt.ylabel('HUFL [15 min temperature]', fontsize=17)
plt.axvline(x_val, color='black', linestyle='-.')
plt.axvline(x_test, color='black', linestyle='-.')
plt.text(x_val, 5, ' Validation', fontsize=12)
plt.text(x_test, 5, ' Test', fontsize=12)
plt.grid()
from ray import tune
from neuralforecast.auto import AutoNHITS
from neuralforecast.core import NeuralForecast
horizon = 96 # 24hrs = 4 * 15 min.
# Use your own config or AutoNHITS.default_config
nhits_config = {
"learning_rate": tune.choice([1e-3]), # Initial Learning rate
"max_steps": tune.choice([1000]), # Number of SGD steps
"input_size": tune.choice([5 * horizon]), # input_size = multiplier * horizon
"batch_size": tune.choice([7]), # Number of series in windows
"windows_batch_size": tune.choice([256]), # Number of windows in batch
"n_pool_kernel_size": tune.choice([[2, 2, 2], [16, 8, 1]]), # MaxPool's Kernelsize
"n_freq_downsample": tune.choice([[168, 24, 1], [24, 12, 1], [1, 1, 1]]), # Interpolation expressivity ratios
"activation": tune.choice(['ReLU']), # Type of non-linear activation
"n_blocks": tune.choice([[1, 1, 1]]), # Blocks per each 3 stacks
"mlp_units": tune.choice([[[512, 512], [512, 512], [512, 512]]]), # 2 512-Layers per block for each stack
"interpolation_mode": tune.choice(['linear']), # Type of multi-step interpolation
"val_check_steps": tune.choice([100]), # Compute validation every 100 epochs
"random_seed": tune.randint(1, 10),
}
tft_config = {
"input_size": tune.choice([horizon]),
"hidden_size": tune.choice([32]),
"n_head": tune.choice([2]),
"learning_rate": tune.loguniform(1e-4, 1e-1),
"scaler_type": tune.choice(['robust', 'standard']),
"max_steps": tune.choice([500, 1000]),
"windows_batch_size": tune.choice([32]),
"check_val_every_n_epoch": tune.choice([100]),
"random_seed": tune.randint(1, 20),
}
tsmixer_config = {
"input_size": input_size, # Size of input window
"max_steps": tune.choice([500, 1000, 2000]), # Number of training iterations
"val_check_steps": 100, # Compute validation every x steps
"early_stop_patience_steps": 5, # Early stopping steps
"learning_rate": tune.loguniform(1e-4, 1e-2), # Initial Learning rate
"n_block": tune.choice([1, 2, 4, 6, 8]), # Number of mixing layers
"dropout": tune.uniform(0.0, 0.99), # Dropout
"ff_dim": tune.choice([32, 64, 128]), # Dimension of the feature linear layer
"scaler_type": 'identity',
}
tsmixerx_config = tsmixer_config.copy()
tsmixerx_config['futr_exog_list'] = ['ex_1', 'ex_2', 'ex_3', 'ex_4']
models = [AutoNHITS(h=horizon,
config=nhits_config,
num_samples=5),
AutoTFT(h=horizon,
loss=MAE(),
config=tft_config,
num_samples=3),
TSMixer(h=horizon,
input_size=input_size,
n_series=7,
max_steps=1000,
val_check_steps=100,
early_stop_patience_steps=5,
scaler_type='identity',
valid_loss=MAE(),
random_seed=12345678,
),
TSMixerx(h=horizon,
input_size=input_size,
n_series=7,
max_steps=1000,
val_check_steps=100,
early_stop_patience_steps=5,
scaler_type='identity',
dropout=0.7,
valid_loss=MAE(),
random_seed=12345678,
futr_exog_list=['ex_1', 'ex_2', 'ex_3', 'ex_4'],
),
MLPMultivariate(h=horizon,
input_size=input_size,
n_series=7,
max_steps=1000,
val_check_steps=100,
early_stop_patience_steps=5,
scaler_type='standard',
hidden_size=256,
valid_loss=MAE(),
random_seed=12345678,
),
NHITS(h=horizon,
input_size=horizon,
max_steps=1000,
val_check_steps=100,
early_stop_patience_steps=5,
scaler_type='robust',
valid_loss=MAE(),
random_seed=12345678,
),
AutoTSMixer(h=horizon,
n_series=7,
loss=MAE(),
config=tsmixer_config,
num_samples=10,
search_alg=HyperOptSearch(),
backend='ray',
valid_loss=MAE()) ,
AutoTSMixerx(h=horizon,
n_series=7,
loss=MAE(),
config=tsmixerx_config,
num_samples=10,
search_alg=HyperOptSearch(),
backend='ray',
valid_loss=MAE()) ]
nf = NeuralForecast(
models=models,
freq='15min')
Y_hat_df = nf.cross_validation(df=Y_df, val_size=val_size,
test_size=test_size, n_windows=None)
nf.models[0].results.get_best_result().config
y_true = Y_hat_df.y.values
y_hat = Y_hat_df['AutoNHITS'].values
n_series = len(Y_df.unique_id.unique())
y_true = y_true.reshape(n_series, -1, horizon)
y_hat = y_hat.reshape(n_series, -1, horizon)
print('Parsed results')
print('2. y_true.shape (n_series, n_windows, n_time_out):\t', y_true.shape)
print('2. y_hat.shape (n_series, n_windows, n_time_out):\t', y_hat.shape)
fig, axs = plt.subplots(nrows=3, ncols=1, figsize=(10, 11))
fig.tight_layout()
series = ['HUFL','HULL','LUFL','LULL','MUFL','MULL','OT']
series_idx = 3
for idx, w_idx in enumerate([200, 300, 400]):
axs[idx].plot(y_true[series_idx, w_idx,:],label='True')
axs[idx].plot(y_hat[series_idx, w_idx,:],label='Forecast')
axs[idx].grid()
axs[idx].set_ylabel(series[series_idx]+f' window {w_idx}',
fontsize=17)
if idx==2:
axs[idx].set_xlabel('Forecast Horizon', fontsize=17)
plt.legend()
plt.show()
plt.close()
from neuralforecast.losses.numpy import mae, mse
print('MAE: ', mae(y_hat, y_true))
print('MSE: ', mse(y_hat, y_true))

View File

@ -0,0 +1,14 @@
# 定时执行cmd命令
import os
import time
from main import predict_main
while True:
try:
print(time.strftime('%H:%M'))
# 判断是不是工作日且 是1700 700 才执行
if time.strftime('%A') not in ['Saturday', 'Sunday'] and time.strftime('%H:%M') in [ '18:00']:
predict_main()
time.sleep(60)
except:
pass