py文件

2024-11-01 16:38:21 +08:00 · 2024-11-01 16:38:21 +08:00 · c76cebdec6
commit c76cebdec6
parent 296db87abc
18 changed files with 5369 additions and 0 deletions
--- a/copy.py
+++ b/copy.py
@ -0,0 +1,55 @@
 # 统计特征频度
 # 读取文件
 import pandas as pd
 df  = pd.read_csv("D:\code\huarongqiming\碳酸锂合并数据.csv",encoding='gbk')
 df['ds'] = pd.to_datetime(df['ds'])
 # 按ds正序排序，重置索引
 df = df.sort_values(by='ds', ascending=True).reset_index(drop=True)
 # 统计特征频度
 # 每列随机抽取6个值，计算出5个时间间隔，统计每个时间间隔的频度
 columns = df.columns.to_list()
 columns.remove('ds')
 count_dict = {}
 for column in columns:
    # 获取每列时间间隔
    values = df[[column,'ds']]
    values.dropna(inplace=True,axis=0)
    values=values.reset_index(drop=True)
    # 抽取10个值
    value = values.sample(10)
    index = value.index
    next_index = index + 1
    count = []
    for i,j in zip(index, next_index):
        #通过索引计算日期差
        try:
            count.append((values.loc[j,'ds'] - values.loc[i,'ds']).days)
        except:
            pass
    # 把31 换成 30 
    count = [30 if i == 31 else i for i in count]
    # 保留count中出现次数最多的数
    count = max(set(count), key=count.count)
    # 存储到字典中
    count_dict[column] = count
 df = pd.DataFrame(count_dict,index=['count']).T
 pindu_dfs = pd.DataFrame()
 # 根据count分组
 # 输出特征频度统计
 pindudict = {'1':'日度','7':'周度','30':'月度','90':'季度','180':'半年度','365':'年度'}
 for i in df.groupby('count'):
    # 获取 i[1]  的索引值
    index = i[1].index
    pindu_df = pd.DataFrame()
    pindu_df[pindudict[str(i[0])]+f'({len(i[1])})'] = index
    # 合并到pindu_dfs
    pindu_dfs = pd.concat([pindu_dfs,pindu_df],axis=1)
 # nan替换为 ' '
 pindu_dfs = pindu_dfs.fillna('')
 pindu_dfs.to_csv('D:\code\huarongqiming\pindu.csv',index=False)
 print(pindu_dfs)
 print('*'*200)
--- a/aa.py
+++ b/aa.py
@ -0,0 +1,10 @@
 # 定时执行cmd命令
 import os
 import time
 while True:
    print(time.strftime('%H:%M'))
    # 判断是不是工作日且 是17：00 7：00  才执行
    if time.strftime('%A') not in ['Saturday', 'Sunday'] and time.strftime('%H:%M') in [ '07:00']:
        os.system(' D:/ProgramData/anaconda3/python.exe main.py')
    time.sleep(60)
--- a/config_jingbo.py
+++ b/config_jingbo.py
@ -0,0 +1,254 @@
 import logging
 import os
 import logging.handlers
 import datetime
 # eta 接口token
 APPID = "XNLDvxZHHugj7wJ7"
 SECRET = "iSeU4s6cKKBVbt94htVY1p0sqUMqb2xa"
 # eta 接口url
 sourcelisturl = 'http://10.189.2.78:8108/v1/edb/source/list'  
 classifylisturl = 'http://10.189.2.78:8108/v1/edb/classify/list?ClassifyType='
 uniquecodedataurl = 'http://10.189.2.78:8108/v1/edb/data?UniqueCode=4991c37becba464609b409909fe4d992&StartDate=2024-02-01'
 classifyidlisturl = 'http://10.189.2.78:8108/v1/edb/list?ClassifyId='
 edbcodedataurl = 'http://10.189.2.78:8108/v1/edb/data?EdbCode='
 edbdatapushurl = 'http://10.189.2.78:8108/v1/edb/push' 
 edbdeleteurl = 'http://10.189.2.78:8108/v1/edb/business/edb/del'  
 edbbusinessurl = 'http://10.189.2.78:8108/v1/edb/business/data/del'  
 edbcodelist = ['CO1 Comdty', 'ovx index', 'C2404194834', 'C2404199738', 'dxy curncy', 'C2403128043', 'C2403150124',
                   'DOESCRUD Index', 'WTRBM1 EEGC Index', 'FVHCM1 INDEX', 'doedtprd index', 'CFFDQMMN INDEX',
                   'C2403083739', 'C2404167878', 'C2403250571', 'lmcads03 lme comdty', 'GC1 COMB Comdty',
                   'C2404171822','C2404167855']
 # 临时写死用指定的列,与上面的edbcode对应，后面更改
 edbnamelist = [
    'ds','y',
    'Brent c1-c6','Brent c1-c3','Brent-WTI','美国商业原油库存',
    'DFL','美国汽油裂解价差','ovx index','dxy curncy','lmcads03 lme comdty',
    'C2403128043','C2403150124','FVHCM1 INDEX','doedtprd index','CFFDQMMN INDEX',
    'C2403083739','C2404167878',
    'GC1 COMB Comdty','C2404167855'
    ]
 # eta自有数据指标编码
 modelsindex = {
        'NHITS': 'SELF0000001',
        'Informer':'SELF0000057',
        'LSTM':'SELF0000058',
        'iTransformer':'SELF0000059',
        'TSMixer':'SELF0000060',
        'TSMixerx':'SELF0000061',
        'PatchTST':'SELF0000062',
        'RNN':'SELF0000063',
        'GRU':'SELF0000064',
        'TCN':'SELF0000065',
        'BiTCN':'SELF0000066',
        'DilatedRNN':'SELF0000067',
        'MLP':'SELF0000068',
        'DLinear':'SELF0000069',
        'NLinear':'SELF0000070',
        'TFT':'SELF0000071',
        'FEDformer':'SELF0000072',
        'StemGNN':'SELF0000073',
        'MLPMultivariate':'SELF0000074',
        'TiDE':'SELF0000075',
        'DeepNPTS':'SELF0000076'
    }
 # eta 上传预测结果的请求体，后面发起请求的时候更改 model datalist  数据
 data = {
            "IndexCode": "",
            "IndexName": "价格预测模型",
            "Unit": "无",
            "Frequency": "日度",
            "SourceName": f"价格预测",
            "Remark": 'ddd',
            "DataList": [
                {
                    "Date": "2024-05-02",
                    "Value": 333444
                }
            ]
        }
 # eta 分类
 # level：3才可以获取到数据，所以需要人工把能源化工下所有的level3级都找到
        # url = 'http://10.189.2.78:8108/v1/edb/list?ClassifyId=1214'
        #ParentId ":1160, 能源化工
        # ClassifyId ":1214,原油
        #ParentId ":1214,"，就是原油下所有的数据。
 ClassifyId = 1214  
 ### 报告上传配置
 # 变量定义--线上环境
 # login_pushreport_url = "http://10.200.32.39/jingbo-api/api/server/login"
 # upload_url = "http://10.200.32.39/jingbo-api/api/analysis/reportInfo/researchUploadReportSave"
 # login_data = {
 #     "data": {
 #         "account": "api_dev",
 #         "password": "ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=",
 #         "tenantHashCode": "8a4577dbd919675758d57999a1e891fe",
 #         "terminal": "API"
 #     },
 #     "funcModule": "API",
 #     "funcOperation": "获取token"
 # }
 # upload_data = {
 #     "funcModule":'研究报告信息',
 #     "funcOperation":'上传原油价格预测报告',
 #     "data":{
 #         "ownerAccount":'27663', #报告所属用户账号  27663 - 刘小朋
 #         "reportType":'OIL_PRICE_FORECAST', # 报告类型，固定为OIL_PRICE_FORECAST
 #         "fileName": '', #文件名称 
 #         "fileBase64": '' ,#文件内容base64
 #         "categoryNo":'yyjgycbg', # 研究报告分类编码
 #         "smartBusinessClassCode":'YCJGYCBG', #分析报告分类编码
 #         "reportEmployeeCode":"E40482" ,# 报告人  E40482  - 管理员  0000027663 - 刘小朋  
 #         "reportDeptCode" :"002000621000", # 报告部门 - 002000621000  SH期货研究部  
 #         "productGroupCode":"RAW_MATERIAL"  # 商品分类
 #   }
 # }
 # # 变量定义--测试环境
 login_pushreport_url = "http://192.168.100.53:8080/jingbo-dev/api/server/login"
 upload_url = "http://192.168.100.53:8080/jingbo-dev/api/analysis/reportInfo/researchUploadReportSave"
 # upload_url = "http://192.168.100.109:8080/jingbo/api/analysis/reportInfo/researchUploadReportSave" # zhaoqiwei
 login_data = {
    "data": {
        "account": "api_test",
        "password": "MmVmNzNlOWI0MmY0ZDdjZGUwNzE3ZjFiMDJiZDZjZWU=",
        "tenantHashCode": "8a4577dbd919675758d57999a1e891fe",
        "terminal": "API"
    },
    "funcModule": "API",
    "funcOperation": "获取token"
 }
 upload_data = {
    "funcModule":'研究报告信息',
    "funcOperation":'上传原油价格预测报告',
    "data":{
        "ownerAccount":'arui', #报告所属用户账号
        "reportType":'OIL_PRICE_FORECAST', # 报告类型，固定为OIL_PRICE_FORECAST
        "fileName": '2000-40-5-50--100-原油指标数据.xlsx-Brent活跃合约--2024-09-06-15-01-29-预测报告.pdf', #文件名称 
        "fileBase64": '' ,#文件内容base64
        "categoryNo":'yyjgycbg', # 研究报告分类编码
        "smartBusinessClassCode":'YCJGYCBG', #分析报告分类编码
        "reportEmployeeCode":"E40116", # 报告人
        "reportDeptCode" :"D0044" ,# 报告部门
        "productGroupCode":"RAW_MATERIAL"  # 商品分类
  }
 }
 ### 线上开关
 # is_train = True # 是否训练
 # is_debug = False # 是否调试
 # is_eta = True # 是否使用eta接口
 # is_timefurture = True # 是否使用时间特征
 # is_fivemodels = False # 是否使用之前保存的最佳的5个模型
 # is_edbcode = False # 特征使用edbcoding列表中的
 # is_edbnamelist = False # 自定义特征，对应上面的edbnamelist
 # is_update_eta  = True  # 预测结果上传到eta
 # is_update_report = True # 是否上传报告
 ### 开关
 is_train = True # 是否训练
 is_debug = False # 是否调试
 is_eta = True # 是否使用eta接口
 is_timefurture = True # 是否使用时间特征
 is_fivemodels = False # 是否使用之前保存的最佳的5个模型
 is_edbcode = False # 特征使用edbcoding列表中的
 is_edbnamelist = False # 自定义特征，对应上面的edbnamelist
 is_update_eta  = False  # 预测结果上传到eta
 is_update_report = False # 是否上传报告
 # 数据截取日期
 end_time = '' # 数据截取日期
 delweekenday = True
 is_corr = False # 特征是否参与滞后领先提升相关系数
 add_kdj = False # 是否添加kdj指标
 if add_kdj and is_edbnamelist:
    edbnamelist = edbnamelist+['K','D','J']
 ### 模型参数  
 y = 'Brent活跃合约' # 原油指标数据的目标变量  
 # y = '期货结算价(连续):布伦特原油:前一个观测值' # ineoil的目标变量  
 horizon =5 # 预测的步长
 input_size = 40  # 输入序列长度
 train_steps = 50 if is_debug else 1000 # 训练步数,用来限定epoch次数
 val_check_steps = 30  # 评估频率
 early_stop_patience_steps = 5 # 早停的耐心步数   
 # --- 交叉验证用的参数
 test_size = 200  # 测试集大小，定义100，后面使用的时候重新赋值
 val_size = test_size # 验证集大小,同测试集大小
 ### 特征筛选用到的参数
 k = 100 # 特征筛选数量，如果是0或者值比特征数量大，代表全部特征
 ### 文件
 data_set = '原油指标数据.xlsx' # 数据集文件  
 # data_set = 'INE_OIL(1).csv'
 ### 文件夹
 dataset = 'dataset' # 数据集文件夹
 # 数据库名称
 db_name = os.path.join(dataset,'jbsh_yuanyou.db')
 settings = f'{input_size}-{horizon}-{train_steps}--{k}-{data_set}-{y}' 
 # 获取日期时间
 now = datetime.datetime.now().strftime('%Y%m%d%H%M%S') # 获取当前日期时间
 reportname = f'Brent原油大模型预测--{now}.pdf' # 报告文件名
 reportname = reportname.replace(':', '-') # 替换冒号
 ### 邮件配置
 username='1321340118@qq.com'
 passwd='wgczgyhtyyyyjghi'
 # recv=['liurui_test@163.com','52585119@qq.com']
 recv=['liurui_test@163.com']
 title='reportname'
 content='brent价格预测报告请看附件'
 file=os.path.join(dataset,'reportname')
 # file=os.path.join(dataset,'14-7-50--100-原油指标数据.xlsx-Brent连1合约价格--20240731175936-预测报告.pdf')
 ssl=True
 ### 日志配置
 # 创建日志目录（如果不存在）
 log_dir = 'logs'
 if not os.path.exists(log_dir):
    os.makedirs(log_dir)
 # 配置日志记录器
 logger = logging.getLogger('my_logger')
 logger.setLevel(logging.INFO)
 # 配置文件处理器，将日志记录到文件
 file_handler = logging.handlers.RotatingFileHandler(os.path.join(log_dir, 'pricepredict.log'), maxBytes=1024 * 1024, backupCount=5)
 file_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
 # 配置控制台处理器，将日志打印到控制台
 console_handler = logging.StreamHandler()
 console_handler.setFormatter(logging.Formatter('%(message)s'))
 # 将处理器添加到日志记录器
 logger.addHandler(file_handler)
 logger.addHandler(console_handler)
 # logger.info('当前配置：'+settings)
--- a/config_juxiting.py
+++ b/config_juxiting.py
@ -0,0 +1,297 @@
 import logging
 import os
 import logging.handlers
 import datetime
 # eta 接口token
 APPID = "XNLDvxZHHugj7wJ7"
 SECRET = "iSeU4s6cKKBVbt94htVY1p0sqUMqb2xa"
 # eta 接口url
 sourcelisturl = 'http://10.189.2.78:8108/v1/edb/source/list'  
 classifylisturl = 'http://10.189.2.78:8108/v1/edb/classify/list?ClassifyType='
 uniquecodedataurl = 'http://10.189.2.78:8108/v1/edb/data?UniqueCode=4991c37becba464609b409909fe4d992&StartDate=2024-02-01'
 classifyidlisturl = 'http://10.189.2.78:8108/v1/edb/list?ClassifyId='
 edbcodedataurl = 'http://10.189.2.78:8108/v1/edb/data?EdbCode='
 edbdatapushurl = 'http://10.189.2.78:8108/v1/edb/push' 
 edbdeleteurl = 'http://10.189.2.78:8108/v1/edb/business/edb/del'  
 edbbusinessurl = 'http://10.189.2.78:8108/v1/edb/business/data/del'  
 edbcodelist = ['ID01385938','lmcads03 lme comdty',
 'GC1 COMB Comdty',
 'C2404171822',
 'dxy curncy',
 'S5443199 ',
 'S5479800',
 'S5443108',
 'H7358586',
 'LC3FM1 INDEX',
 'CNY REGN Curncy',
 's0105897',
 'M0067419',
 'M0066351',
 'S0266372',
 'S0266438',
 'S0266506']
 # 临时写死用指定的列,与上面的edbcode对应，后面更改
 edbnamelist = [
    'ds','y',
    'LME铜价',
    '黄金连1合约',
    'Brent-WTI',
    '美元指数',
    '甲醇鲁南价格',
    '甲醇太仓港口价格',
    '山东丙烯主流价',
    '丙烷（山东）',
    'FEI丙烷 M1',
    '在岸人民币汇率',
    '南华工业品指数',
    'PVC期货主力',
    'PE期货收盘价',
 'PP连续-1月',
 'PP连续-5月',
 'PP连续-9月',
    ]
 edbcodenamedict = {
 'ID01385938':'PP：拉丝：1102K：市场价：青州：国家能源宁煤（日）',
 'lmcads03 lme comdty':'LME铜价',
 'GC1 COMB Comdty':'黄金连1合约',
 'C2404171822':'Brent-WTI',
 'dxy curncy':'美元指数',
 'S5443199 ':'甲醇鲁南价格',
 'S5479800':'甲醇太仓港口价格',
 'S5443108':'山东丙烯主流价',
 'H7358586':'丙烷（山东）',
 'LC3FM1 INDEX':'FEI丙烷 M1',
 'CNY REGN Curncy':'在岸人民币汇率',
 's0105897':'南华工业品指数',
 'M0067419':'PVC期货主力',
 'M0066351':'PE期货收盘价',
 'S0266372':'PP连续-1月',
 'S0266438':'PP连续-5月',
 'S0266506':'PP连续-9月',
 }
 # eta自有数据指标编码
 modelsindex = {
        'NHITS': 'SELF0000001',
        'Informer':'SELF0000057',
        'LSTM':'SELF0000058',
        'iTransformer':'SELF0000059',
        'TSMixer':'SELF0000060',
        'TSMixerx':'SELF0000061',
        'PatchTST':'SELF0000062',
        'RNN':'SELF0000063',
        'GRU':'SELF0000064',
        'TCN':'SELF0000065',
        'BiTCN':'SELF0000066',
        'DilatedRNN':'SELF0000067',
        'MLP':'SELF0000068',
        'DLinear':'SELF0000069',
        'NLinear':'SELF0000070',
        'TFT':'SELF0000071',
        'FEDformer':'SELF0000072',
        'StemGNN':'SELF0000073',
        'MLPMultivariate':'SELF0000074',
        'TiDE':'SELF0000075',
        'DeepNPTS':'SELF0000076'
    }
 # eta 上传预测结果的请求体，后面发起请求的时候更改 model datalist  数据
 data = {
            "IndexCode": "",
            "IndexName": "价格预测模型",
            "Unit": "无",
            "Frequency": "日度",
            "SourceName": f"价格预测",
            "Remark": 'ddd',
            "DataList": [
                {
                    "Date": "2024-05-02",
                    "Value": 333444
                }
            ]
        }
 # eta 分类
 # level：3才可以获取到数据，所以需要人工把能源化工下所有的level3级都找到
        # url = 'http://10.189.2.78:8108/v1/edb/list?ClassifyId=1214'
        #ParentId ":1160, 能源化工
        # ClassifyId ":1214,原油  ,1161 PP
        #ParentId ":1214,"，就是原油下所有的数据。
 ClassifyId = 1161  
 ### 报告上传配置
 # 变量定义--线上环境
 login_pushreport_url = "http://10.200.32.39/jingbo-api/api/server/login"
 upload_url = "http://10.200.32.39/jingbo-api/api/dw/dataValue/pushDataValueList"
 login_data = {
    "data": {
        "account": "api_dev",
        "password": "ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=",
        "tenantHashCode": "8a4577dbd919675758d57999a1e891fe",
        "terminal": "API"
    },
    "funcModule": "API",
    "funcOperation": "获取token"
 }
 upload_data = {
    "funcModule":'研究报告信息',
    "funcOperation":'上传原油价格预测报告',
    "data":{
        "ownerAccount":'27663', #报告所属用户账号  27663 - 刘小朋
        "reportType":'OIL_PRICE_FORECAST', # 报告类型，固定为OIL_PRICE_FORECAST
        "fileName": '', #文件名称 
        "fileBase64": '' ,#文件内容base64
        "categoryNo":'yyjgycbg', # 研究报告分类编码
        "smartBusinessClassCode":'YCJGYCBG', #分析报告分类编码
        "reportEmployeeCode":"E40482" ,# 报告人  E40482  - 管理员  0000027663 - 刘小朋  
        "reportDeptCode" :"002000621000", # 报告部门 - 002000621000  SH期货研究部  
        "productGroupCode":"RAW_MATERIAL"  # 商品分类
  }
 }
 # # 变量定义--测试环境
 # login_pushreport_url = "http://192.168.100.53:8080/jingbo-dev/api/server/login"
 # upload_url = "http://192.168.100.53:8080/jingbo-dev/api/analysis/reportInfo/researchUploadReportSave"
 # # upload_url = "http://192.168.100.109:8080/jingbo/api/analysis/reportInfo/researchUploadReportSave" # zhaoqiwei
 # login_data = {
 #     "data": {
 #         "account": "api_test",
 #         "password": "MmVmNzNlOWI0MmY0ZDdjZGUwNzE3ZjFiMDJiZDZjZWU=",
 #         "tenantHashCode": "8a4577dbd919675758d57999a1e891fe",
 #         "terminal": "API"
 #     },
 #     "funcModule": "API",
 #     "funcOperation": "获取token"
 # }
 # upload_data = {
 #     "funcModule":'研究报告信息',
 #     "funcOperation":'上传原油价格预测报告',
 #     "data":{
 #         "ownerAccount":'arui', #报告所属用户账号
 #         "reportType":'OIL_PRICE_FORECAST', # 报告类型，固定为OIL_PRICE_FORECAST
 #         "fileName": '2000-40-5-50--100-原油指标数据.xlsx-Brent活跃合约--2024-09-06-15-01-29-预测报告.pdf', #文件名称 
 #         "fileBase64": '' ,#文件内容base64
 #         "categoryNo":'yyjgycbg', # 研究报告分类编码
 #         "smartBusinessClassCode":'YCJGYCBG', #分析报告分类编码
 #         "reportEmployeeCode":"E40116", # 报告人
 #         "reportDeptCode" :"D0044" ,# 报告部门
 #         "productGroupCode":"RAW_MATERIAL"  # 商品分类
 #   }
 # }
 ### 线上开关
 # is_train = True # 是否训练
 # is_debug = False # 是否调试
 # is_eta = True # 是否使用eta接口
 # is_timefurture = True # 是否使用时间特征
 # is_fivemodels = False # 是否使用之前保存的最佳的5个模型
 # is_edbcode = False # 特征使用edbcoding列表中的
 # is_edbnamelist = False # 自定义特征，对应上面的edbnamelist
 # is_update_report = True # 是否上传报告
 ### 开关
 is_train = True # 是否训练
 is_debug = False # 是否调试
 is_eta = True # 是否使用eta接口
 is_timefurture = True # 是否使用时间特征
 is_fivemodels = False # 是否使用之前保存的最佳的5个模型
 is_edbcode = False # 特征使用edbcoding列表中的
 is_edbnamelist = False # 自定义特征，对应上面的edbnamelist
 is_update_eta  = False  # 预测结果上传到eta
 is_update_report = False # 是否上传报告
 # 数据截取日期
 end_time = '' # 数据截取日期
 delweekenday = True
 is_corr = False # 特征是否参与滞后领先提升相关系数
 add_kdj = False # 是否添加kdj指标
 if add_kdj and is_edbnamelist:
    edbnamelist = edbnamelist+['K','D','J']
 ### 模型参数  
 y = 'PP：拉丝：1102K：市场价：青州：国家能源宁煤（日）' # 原油指标数据的目标变量  
 # y = '期货结算价(连续):布伦特原油:前一个观测值' # ineoil的目标变量  
 horizon =5 # 预测的步长
 input_size = 40  # 输入序列长度
 train_steps = 50 if is_debug else 1000 # 训练步数,用来限定epoch次数
 val_check_steps = 30  # 评估频率
 early_stop_patience_steps = 5 # 早停的耐心步数   
 # --- 交叉验证用的参数
 test_size = 200  # 测试集大小，定义100，后面使用的时候重新赋值
 val_size = test_size # 验证集大小,同测试集大小
 ### 特征筛选用到的参数
 k = 100 # 特征筛选数量，如果是0或者值比特征数量大，代表全部特征
 ### 文件
 data_set = 'PP指标数据.xlsx' # 数据集文件  
 # data_set = 'INE_OIL(1).csv'
 ### 文件夹
 dataset = 'dataset' # 数据集文件夹
 # 数据库名称
 db_name = os.path.join(dataset,'jbsh_juxiting.db')
 settings = f'{input_size}-{horizon}-{train_steps}--{k}-{data_set}-{y}' 
 # 获取日期时间
 now = datetime.datetime.now().strftime('%Y%m%d%H%M%S') # 获取当前日期时间
 reportname = f'PP--{now}-预测报告.pdf' # 报告文件名
 reportname = reportname.replace(':', '-') # 替换冒号
 ### 邮件配置
 username='1321340118@qq.com'
 passwd='wgczgyhtyyyyjghi'
 # recv=['liurui_test@163.com','52585119@qq.com']
 recv=['liurui_test@163.com']
 title=reportname
 content=y+'预测报告请看附件'
 file=os.path.join(dataset,reportname)
 # file=os.path.join(dataset,'14-7-50--100-原油指标数据.xlsx-Brent连1合约价格--20240731175936-预测报告.pdf')
 ssl=True
 ### 日志配置
 # 创建日志目录（如果不存在）
 log_dir = 'logs'
 if not os.path.exists(log_dir):
    os.makedirs(log_dir)
 # 配置日志记录器
 logger = logging.getLogger('my_logger')
 logger.setLevel(logging.INFO)
 # 配置文件处理器，将日志记录到文件
 file_handler = logging.handlers.RotatingFileHandler(os.path.join(log_dir, 'pricepredict.log'), maxBytes=1024 * 1024, backupCount=5)
 file_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
 # 配置控制台处理器，将日志打印到控制台
 console_handler = logging.StreamHandler()
 console_handler.setFormatter(logging.Formatter('%(message)s'))
 # 将处理器添加到日志记录器
 logger.addHandler(file_handler)
 logger.addHandler(console_handler)
 # logger.info('当前配置：'+settings)
--- a/config_tansuanli.py
+++ b/config_tansuanli.py
@ -0,0 +1,106 @@
 import logging
 import os
 import logging.handlers
 # eta 接口token
 APPID = "XNLDvxZHHugj7wJ7"
 SECRET = "iSeU4s6cKKBVbt94htVY1p0sqUMqb2xa"
 # eta 接口url
 sourcelisturl = 'http://10.189.2.78:8108/v1/edb/source/list'  
 classifylisturl = 'http://10.189.2.78:8108/v1/edb/classify/list?ClassifyType='
 uniquecodedataurl = 'http://10.189.2.78:8108/v1/edb/data?UniqueCode=4991c37becba464609b409909fe4d992&StartDate=2024-02-01'
 classifyidlisturl = 'http://10.189.2.78:8108/v1/edb/list?ClassifyId='
 edbcodedataurl = 'http://10.189.2.78:8108/v1/edb/data?EdbCode='
 edbcodelist = ['CO1 Comdty', 'ovx index', 'C2404194834', 'C2404199738', 'dxy curncy', 'C2403128043', 'C2403150124',
                   'DOESCRUD Index', 'WTRBM1 EEGC Index', 'FVHCM1 INDEX', 'doedtprd index', 'CFFDQMMN INDEX',
                   'C2403083739', 'C2404167878', 'C2403250571', 'ovx index', 'lmcads03 lme comdty', 'GC1 COMB Comdty',
                   'C2404171822']
 # 临时写死用指定的列,与上面的edbcode对应，后面更改
 edbnamelist = [
    'ds','y',
    'Brent c1-c6','Brent c1-c3','Brent-WTI','美国商业原油库存',
    'DFL','美国汽油裂解价差','ovx index','dxy curncy','lmcads03 lme comdty',
    'C2403128043','C2403150124','FVHCM1 INDEX','doedtprd index','CFFDQMMN INDEX',
    'C2403083739','C2404167878',
    # 'ovx index',
    'GC1 COMB Comdty'
    ]
 # eta 分类
 # level：3才可以获取到数据，所以需要人工把能源化工下所有的level3级都找到
        # url = 'http://10.189.2.78:8108/v1/edb/list?ClassifyId=1214'
        #ParentId ":1160, 能源化工
        # ClassifyId ":1214,原油
        #ParentId ":1214,"，就是原油下所有的数据。
 ClassifyId = 1214  
 ### 开关
 is_train = True # 是否训练
 is_debug = True # 是否调试
 is_eta = False # 是否使用eta接口
 is_timefurture = False # 是否使用时间特征
 is_fivemodels = False # 是否使用之前保存的最佳的5个模型
 is_edbcode = False # 特征使用edbcoding列表中的
 is_edbnamelist = False # 启用指定的edbname,影响特征选择，只有数据集 原油时 才考虑启用
 # 数据截取日期
 end_time = '2024-07-30' # 数据截取日期
 delweekenday = True # 是否删除周末数据
 ### 模型参数  
 y = '电碳价格' # 原油指标数据的目标变量  
 horizon =5 # 预测的步长
 input_size = 10  # 输入序列长度
 train_steps = 10 if is_debug else 1000 # 训练步数,用来限定epoch次数
 val_check_steps = 30  # 评估频率
 early_stop_patience_steps = 5 # 早停的耐心步数   
 ### --- 交叉验证用的参数
 test_size = 100  # 测试集大小，定义100，后面使用的时候重新赋值
 val_size = test_size # 验证集大小,同测试集大小
 ### --- 特征筛选用到的参数
 k = 100 # 特征筛选数量，如果是0或者值比特征数量大，代表全部特征
 ### --- 文件
 data_set = '碳酸锂合并数据.csv' # 数据集文件  
 ### --- 文件夹
 dataset = 'dataset' # 数据集文件夹
 settings = f'{input_size}-{horizon}-{train_steps}--{k}-{data_set}-{y}' 
 import datetime
 now = datetime.datetime.now().strftime('%Y%m%d%H%M%S') # 获取当前日期时间
 reportname = f'{settings}--{now}-预测报告.pdf' # 报告文件名
 reportname = reportname.replace(':', '-') # 替换冒号
 ### 邮件配置
 username='1321340118@qq.com'
 passwd='wgczgyhtyyyyjghi'
 # recv=['liurui_test@163.com','52585119@qq.com']
 recv=['liurui_test@163.com']
 title='reportname'
 content='brent价格预测报告请看附件'
 file=os.path.join(dataset,'reportname')
 # file=os.path.join(dataset,'14-7-50--100-原油指标数据.xlsx-Brent连1合约价格--20240731175936-预测报告.pdf')
 ssl=True
 ### --- 日志配置
 # 创建日志目录（如果不存在）
 log_dir = 'logs'
 if not os.path.exists(log_dir):
    os.makedirs(log_dir)
 # 配置日志记录器
 logger = logging.getLogger('my_logger')
 logger.setLevel(logging.INFO)
 # 配置文件处理器，将日志记录到文件
 file_handler = logging.handlers.RotatingFileHandler(os.path.join(log_dir, 'pricepredict.log'), maxBytes=1024 * 1024, backupCount=5)
 file_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
 # 配置控制台处理器，将日志打印到控制台
 console_handler = logging.StreamHandler()
 console_handler.setFormatter(logging.Formatter('%(message)s'))
 # 将处理器添加到日志记录器
 logger.addHandler(file_handler)
 logger.addHandler(console_handler)
 logger.info('当前配置：'+settings)
--- a/lib/init.py
+++ b/lib/init.py
--- a/lib/dataread.py
+++ b/lib/dataread.py
--- a/lib/duojinchengpredict.py
+++ b/lib/duojinchengpredict.py
@ -0,0 +1,191 @@
 import pandas as pd
 import re
 import os
 import pandas as pd
 import multiprocessing
 import time
 import joblib
 import torch
 # 定义函数
 def loadcsv(filename):
    try:
        df = pd.read_csv(filename, encoding='utf-8')
    except UnicodeDecodeError:
        df = pd.read_csv(filename, encoding='gbk')
    return df
 def datachuli(df, datecol='date'):
    # 删除空列
    df = df.dropna(axis=1, how='all')
    # 向上填充
    df.ffill
    # 向下填充
    df.bfill
    # date转为pddate
    df.rename(columns={datecol: 'ds'}, inplace=True)
    # 设置ds为pd.datetime
    df['ds'] = pd.to_datetime(df['ds'])
    # 重命名预测列
    df.rename(columns={'Brent连1合约价格': 'y'}, inplace=True)
    return df
 def getdata(filename, datecol='date'):
    df = loadcsv(filename)
    df = datachuli(df, datecol)
    return df
 # 预测函数
 def predict(X_test, nf,result_list):
    df_predict = nf.predict(X_test).reset_index()
    result_list.append(df_predict.values.tolist())
    return df_predict
 def testSetPredict(X_test, nf, columns,dataset):
    # 记录开始时间
    start_time = time.time()
    # 计算每个进程处理的样本数
    num_samples = len(X_test)
    num_processes = multiprocessing.cpu_count()
    samples_per_process = num_samples // num_processes
    manager = multiprocessing.Manager()
    result_list = manager.list()  # 创建共享的列表
    # 创建进程池
    with multiprocessing.Pool(num_processes) as pool:
        processes = []
        for i in range(num_processes):
            # 计算 每个进程需要处理的数据索引
            start_index = i * samples_per_process
            end_index = (i + 1) * samples_per_process if i != num_processes - 1 else num_samples
            # 按计算的索引切分数据
            X_test_split = X_test[start_index:end_index]
            # 添加任务到进程池
            for X in X_test_split:
                processes.append(pool.apply_async(predict, args=(X, nf,result_list)))
        for process in processes:
            process.get()
    # 将共享列表中的数据转换回 DataFrame
    df_combined = pd.DataFrame()
    df_combined2 = pd.DataFrame()
    for result in result_list:
        try:
            df_shared = pd.DataFrame(result, columns=['index', 'ds'] + columns)
            df_combined = pd.concat([df_combined, df_shared]).reset_index(drop=True)
        except ValueError:
            # 如果数据不匹配，就放到另一个 DataFrame 中  
            df_shared2 = pd.DataFrame(result, columns=['index', 'ds']+ columns2)
            df_combined2 = pd.concat([df_combined2, df_shared2]).reset_index(drop=True)
    # df_combined.drop(['index'], axis=1, inplace=True)
    df_combined.to_csv(os.path.join(dataset, 'df_combined.csv'), index=False)
    # df_combined2.drop(['index'], axis=1, inplace=True)
    df_combined2.to_csv('df_combined.csv', index=False)
    end_time = time.time()
    # 打印运行时间,转为时分秒
    print("运行时间：", end_time - start_time, "秒")
 if __name__ == '__main__':
        # 记录开始时间
    start_time = time.time()
    # file = '指标数据处理.csv'
    file = 'brentpricepredict.csv'
    df = getdata(file)
    df.head()
    # 选择特征和标签列
    X = df.drop(['y', 'ds'], axis=1)  # 特征集，排除时间戳和标签列   Brent连1合约价格
    y = df['y']  # 标签集
    # 计算训练集的结束索引，占总数据的80%
    split_index = int(0.8 * df.shape[0])
    # 按照时间顺序划分训练集和测试集
    df_train = df[:split_index]
    df_test = df[split_index:]
    df_train['unique_id'] = 1
    df_test['unique_id'] = 1
    df_combined = pd.DataFrame()
    df_test = df_test.reindex()
    # df_test = df_test[-20:]
    # 读取模型列表，用来预测结果列名
    columns = [
        'NHITS',
        'Informer',
        'LSTM',
        'iTransformer',
        'TSMixer',
        'TSMixerx',
        'PatchTST',
        'RNN',
        'GRU',
        'TCN',
        'DeepAR',
        'BiTCN',
        'DilatedRNN',
        'MLP',
        'DLinear',
        'NLinear',
        'TFT',
        'FEDformer',
        'StemGNN',
        'MLPMultivariate',
        'TiDE',
        'DeepNPTS',
    ]
    # deepar 的预测结果会多 五个列，需要单独处理
    columns2 = [
        'NHITS',
        'Informer',
        'LSTM',
        'iTransformer',
        'TSMixer',
        'TSMixerx',
        'PatchTST',
        'RNN',
        'GRU',
        'TCN',
        'DeepAR',
        'DeepAR-median',
        'DeepAR-lo-90',
        'DeepAR-lo-80',
        'DeepAR-hi-80',
        'DeepAR-hi-90',
        'BiTCN',
        'DilatedRNN',
        'MLP',
        'DLinear',
        'NLinear',
        'TFT',
        'FEDformer',
        'StemGNN',
        'MLPMultivariate',
        'TiDE',
        'DeepNPT',
    ]
    input_size = 14
    X_test = []
    for i in range(0, len(df_test) - input_size + 1):
        X_test.append(df_test.iloc[i:i + input_size])
    nf = joblib.load('model_reg.joblib')
    testSetPredict(X_test, nf, columns)
--- a/lib/tools.py
+++ b/lib/tools.py
@ -0,0 +1,448 @@
 import time
 import os
 import numpy as np
 import pandas as pd
 import matplotlib.pyplot as plt
 import seaborn as sns
 from config_jingbo import logger
 from sklearn import metrics
 import random, string, base64, hmac, hashlib
 from reportlab.pdfbase import pdfmetrics   # 注册字体
 from reportlab.pdfbase.ttfonts import TTFont # 字体类
 from reportlab.platypus import Table, SimpleDocTemplate, Paragraph, Image  # 报告内容相关类
 from reportlab.lib.pagesizes import letter  # 页面的标志尺寸(8.5*inch, 11*inch)
 from reportlab.lib.styles import getSampleStyleSheet  # 文本样式
 from reportlab.lib import colors  # 颜色模块
 from reportlab.graphics.charts.barcharts import VerticalBarChart  # 图表类
 from reportlab.graphics.charts.legends import Legend  # 图例类
 from reportlab.graphics.shapes import Drawing  # 绘图工具
 from reportlab.lib.units import cm  # 单位：cm
 import smtplib
 from email.mime.text import MIMEText
 from email.mime.multipart import MIMEMultipart
 import sqlite3
 import tkinter as tk
 from tkinter import messagebox
 def timeit(func):
    '''计时装饰器'''
    def wrapper(*args, **kwargs):
        start_time = time.time()
        result = func(*args, **kwargs)
        end_time = time.time()
        execution_time = end_time - start_time
        logger.info(f"{func.__name__} 函数的执行时间为: {execution_time} 秒")
        return result
    return wrapper
 class BinanceAPI:
    '''
    获取 Binance API 请求头签名
    '''
    def __init__(self, APPID, SECRET):
        self.APPID = APPID
        self.SECRET = SECRET
        self.get_signature()
    # 生成随机字符串作为 nonce
    def generate_nonce(self, length=32):
        self.nonce = ''.join(random.choices(string.ascii_letters + string.digits, k=length))
        return self.nonce
    # 获取当前时间戳（秒）
    def get_timestamp(self):
        return int(time.time())
    # 构建待签名字符串
    def build_sign_str(self):
        return f'appid={self.APPID}&nonce={self.nonce}&timestamp={self.timestamp}'
    # 使用 HMAC SHA-256 计算签名
    def calculate_signature(self, secret, message):
        return base64.urlsafe_b64encode(hmac.new(secret.encode('utf-8'), message.encode('utf-8'), hashlib.sha256).digest()).decode('utf-8')
    def get_signature(self):
        # 调用上述方法生成签名
        self.nonce = self.generate_nonce()
        self.timestamp = self.get_timestamp()
        self.sign_str = self.build_sign_str()
        self.signature = self.calculate_signature(self.SECRET, self.sign_str)
        # return self.signature
 class Graphs:
    '''
    pdf生成类
    '''
    # 绘制标题
    @staticmethod
    def draw_title(title: str):
        # 获取所有样式表
        style = getSampleStyleSheet()
        # 拿到标题样式
        ct = style['Heading1']
        # 单独设置样式相关属性
        ct.fontName = 'SimSun'      # 字体名
        ct.fontSize = 18            # 字体大小
        ct.leading = 50             # 行间距
        ct.textColor = colors.green     # 字体颜色
        ct.alignment = 1    # 居中
        ct.bold = True
        # 创建标题对应的段落，并且返回
        return Paragraph(title, ct)
  # 绘制小标题
    @staticmethod
    def draw_little_title(title: str):
        # 获取所有样式表
        style = getSampleStyleSheet()
        # 拿到标题样式
        ct = style['Normal']
        # 单独设置样式相关属性
        ct.fontName = 'SimSun'  # 字体名
        ct.fontSize = 15  # 字体大小
        ct.leading = 30  # 行间距
        ct.textColor = colors.red  # 字体颜色
        # 创建标题对应的段落，并且返回
        return Paragraph(title, ct)
    # 绘制普通段落内容
    @staticmethod
    def draw_text(text: str):
        # 获取所有样式表
        style = getSampleStyleSheet()
        # 获取普通样式
        ct = style['Normal']
        ct.fontName = 'SimSun'
        ct.fontSize = 12
        ct.wordWrap = 'CJK'     # 设置自动换行
        ct.alignment = 0        # 左对齐
        ct.firstLineIndent = 32     # 第一行开头空格
        ct.leading = 25
        return Paragraph(text, ct)
    # 绘制表格
    @staticmethod
    def draw_table(col_width,*args):
        # 列宽度
        col_width = col_width
        style = [
            ('FONTNAME', (0, 0), (-1, -1), 'SimSun'),  # 字体
            ('FONTSIZE', (0, 0), (-1, 0), 10),  # 第一行的字体大小
            ('FONTSIZE', (0, 1), (-1, -1), 8),  # 第二行到最后一行的字体大小
            ('BACKGROUND', (0, 0), (-1, 0), '#d5dae6'),  # 设置第一行背景颜色
            ('ALIGN', (0, 0), (-1, -1), 'CENTER'),  # 第一行水平居中
            ('ALIGN', (0, 1), (-1, -1), 'LEFT'),  # 第二行到最后一行左右左对齐
            ('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),  # 所有表格上下居中对齐
            ('TEXTCOLOR', (0, 0), (-1, -1), colors.darkslategray),  # 设置表格内文字颜色
            ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),  # 设置表格框线为grey色，线宽为0.5
            # ('SPAN', (0, 1), (0, 2)),  # 合并第一列二三行
            # ('SPAN', (0, 3), (0, 4)),  # 合并第一列三四行
            # ('SPAN', (0, 5), (0, 6)),  # 合并第一列五六行
            # ('SPAN', (0, 7), (0, 8)),  # 合并第一列五六行
        ]
        table = Table(args, colWidths=col_width, style=style)
        return table
    # 创建图表
    @staticmethod
    def draw_bar(bar_data: list, ax: list, items: list):
        drawing = Drawing(500, 250)
        bc = VerticalBarChart()
        bc.x = 45       # 整个图表的x坐标
        bc.y = 45      # 整个图表的y坐标
        bc.height = 200     # 图表的高度
        bc.width = 350      # 图表的宽度
        bc.data = bar_data
        bc.strokeColor = colors.black       # 顶部和右边轴线的颜色
        bc.valueAxis.valueMin = 5000           # 设置y坐标的最小值
        bc.valueAxis.valueMax = 26000         # 设置y坐标的最大值
        bc.valueAxis.valueStep = 2000         # 设置y坐标的步长
        bc.categoryAxis.labels.dx = 2
        bc.categoryAxis.labels.dy = -8
        bc.categoryAxis.labels.angle = 20
        bc.categoryAxis.categoryNames = ax
        # 图示
        leg = Legend()
        leg.fontName = 'SimSun'
        leg.alignment = 'right'
        leg.boxAnchor = 'ne'
        leg.x = 475         # 图例的x坐标
        leg.y = 240
        leg.dxTextSpace = 10
        leg.columnMaximum = 3
        leg.colorNamePairs = items
        drawing.add(leg)
        drawing.add(bc)
        return drawing
    # 绘制图片
    @staticmethod
    def draw_img(path):
        img = Image(path)       # 读取指定路径下的图片
        img.drawWidth = 20*cm        # 设置图片的宽度
        img.drawHeight = 10*cm       # 设置图片的高度
        return img
 # 评估指标不在一个库，这里列出所有用到的指标的公式
 # MSE
 def mse(y_true, y_pred):
    res_mse = metrics.mean_squared_error(y_true, y_pred)
    return res_mse
 # RMSE
 def rmse(y_true, y_pred):
    res_rmse = np.sqrt(metrics.mean_squared_error(y_true, y_pred))
    return res_rmse
 # MAE
 def mae(y_true, y_pred):
    res_mae = metrics.mean_absolute_error(y_true, y_pred)
    return res_mae
 # sklearn的库中没有MAPE和SMAPE，下面根据公式给出算法实现
 # MAPE
 def mape(y_true, y_pred):
    res_mape = np.mean(np.abs((y_pred - y_true) / y_true)) * 100
    return res_mape
 # SMAPE
 def smape(y_true, y_pred):
    res_smape = 2.0 * np.mean(np.abs(y_pred - y_true) / (np.abs(y_pred) + np.abs(y_true))) * 100
    return res_smape
 # 相关系数绘制
 def plot_corr(data, size=11):
    # 去掉ds列
    data.drop(columns=['ds'], inplace=True)
    # 创建一个空的 DataFrame 来保存相关系数
    correlation_df = pd.DataFrame(columns=['Feature', 'Correlation'])
    # 计算各特征与目标列的皮尔逊相关系数，并保存到新的 DataFrame 中
    for col in data.columns:
        if col!= 'y':
            pearson_correlation = np.corrcoef(data[col], data['y'])[0, 1]
            spearman_correlation, _ = spearmanr(data[col], data['y'])
            new_row = {'Feature': col, 'Pearson_Correlation': round(pearson_correlation,3), 'Spearman_Correlation': round(spearman_correlation,2)}
            correlation_df = correlation_df._append(new_row, ignore_index=True)
    # 删除空列
    correlation_df.drop('Correlation', axis=1, inplace=True)
    correlation_df.dropna(inplace=True)
    correlation_df.to_csv('指标相关性分析.csv', index=False)
    data = correlation_df['Pearson_Correlation'].values.tolist()
    # 生成 -1 到 1 的 20 个区间
    bins = np.linspace(-1, 1, 21)
    # 计算每个区间的统计数（这里是区间内数据的数量）
    hist_values = [np.sum((data >= bins[i]) & (data < bins[i + 1])) for i in range(len(bins) - 1)]
    #设置画布大小
    plt.figure(figsize=(10, 6))
    # 绘制直方图
    plt.bar(bins[:-1], hist_values, width=(bins[1] - bins[0]))
    # 添加标题和坐标轴标签
    plt.title('皮尔逊相关系数分布图')
    plt.xlabel('区间')
    plt.ylabel('统计数')
    plt.savefig('皮尔逊相关性系数.png')
    plt.close()
    #设置画布大小
    plt.figure(figsize=(10, 6))
    data = correlation_df['Spearman_Correlation'].values.tolist()
    # 计算每个区间的统计数（这里是区间内数据的数量）
    hist_values = [np.sum((data >= bins[i]) & (data < bins[i + 1])) for i in range(len(bins) - 1)]
    # 绘制直方图
    plt.bar(bins[:-1], hist_values, width=(bins[1] - bins[0]))
    # 添加标题和坐标轴标签
    plt.title('斯皮尔曼相关系数分布图')
    plt.xlabel('区间')
    plt.ylabel('统计数')
    plt.savefig('斯皮尔曼相关性系数.png')
    plt.close()
 # 邮件封装
 class SendMail(object):
    def __init__(self,username,passwd,recv,title,content,
                 file=None,ssl=False,
                 email_host='smtp.qq.com',port=25,ssl_port=465):
        '''
        :param username: 用户名
        :param passwd: 密码
        :param recv: 收件人，多个要传list ['a@qq.com','b@qq.com]
        :param title: 邮件标题
        :param content: 邮件正文
        :param file: 附件路径，如果不在当前目录下，要写绝对路径，默认没有附件
        :param ssl: 是否安全链接，默认为普通
        :param email_host: smtp服务器地址，默认为163服务器
        :param port: 非安全链接端口，默认为25
        :param ssl_port: 安全链接端口，默认为465
        '''
        self.username = username #用户名
        self.passwd = passwd #密码
        self.recv = recv #收件人，多个要传list ['a@qq.com','b@qq.com]
        self.title = title #邮件标题
        self.content = content #邮件正文
        self.file = file #附件路径，如果不在当前目录下，要写绝对路径
        self.email_host = email_host #smtp服务器地址
        self.port = port #普通端口
        self.ssl = ssl #是否安全链接
        self.ssl_port = ssl_port #安全链接端口
    def send_mail(self):
        msg = MIMEMultipart()
        #发送内容的对象
        if self.file:#处理附件的
            file_name = os.path.split(self.file)[-1]#只取文件名，不取路径
            try:
                f = open(self.file, 'rb').read()
            except Exception as e:
                raise Exception('附件打不开！！！！')
            else:
                att = MIMEText(f,"base64", "utf-8")
                att["Content-Type"] = 'application/octet-stream'
                #base64.b64encode(file_name.encode()).decode()
                new_file_name='=?utf-8?b?' + base64.b64encode(file_name.encode()).decode() + '?='
                #这里是处理文件名为中文名的，必须这么写
                att["Content-Disposition"] = 'attachment; filename="%s"'%(new_file_name)
                msg.attach(att)
        msg.attach(MIMEText(self.content))#邮件正文的内容
        msg['Subject'] = self.title  # 邮件主题
        msg['From'] = self.username  # 发送者账号
        msg['To'] = ','.join(self.recv)  # 接收者账号列表
        if self.ssl:
            self.smtp = smtplib.SMTP_SSL(self.email_host,port=self.ssl_port)
        else:
            self.smtp = smtplib.SMTP(self.email_host,port=self.port)
        #发送邮件服务器的对象
        self.smtp.login(self.username,self.passwd)
        try:
            self.smtp.sendmail(self.username,self.recv,msg.as_string())
            pass
        except Exception as e:
            print('出错了。。',e)
            logger.info('邮件服务出错了。。',e)
        else:
            print('发送成功！')
            logger.info('邮件发送成功！')
        self.smtp.quit()
 def dateConvert(df, datecol='ds'):
    # 将date列转换为datetime类型
    try:
        df[datecol] = pd.to_datetime(df[datecol],format=r'%Y-%m-%d')
    except:
        df[datecol] = pd.to_datetime(df[datecol],format=r'%Y/%m/%d')
    return df
 class SQLiteHandler:
    def __init__(self, db_name):
        self.db_name = db_name
        self.connection = None
        self.cursor = None
    def connect(self):
        self.connection = sqlite3.connect(self.db_name)
        self.cursor = self.connection.cursor()
    def close(self):
        if self.connection:
            self.connection.close()
            self.connection = None
            self.cursor = None
    def execute_query(self, query, params=None):
        if params:
            return self.cursor.execute(query, params)
        else:
            return self.cursor.execute(query)
    def commit(self):
        self.connection.commit()
    def create_table(self, table_name, columns):
        query = f"CREATE TABLE IF NOT EXISTS {table_name} ({columns})"
        self.execute_query(query)
        self.commit()
    def insert_data(self, table_name, values, columns=None):
        if columns:
            placeholders = ', '.join(['?'] * len(values))
            query = f"INSERT INTO {table_name} ({', '.join(columns)}) VALUES ({placeholders})"
        else:
            placeholders = ', '.join(['?'] * len(values))
            query = f"INSERT INTO {table_name} VALUES ({placeholders})"
        self.execute_query(query, values)
        self.commit()
    def select_data(self, table_name, columns=None, where_condition=None, order_by=None, limit=None):
        query = f"SELECT {', '.join(columns) if columns else '*'} FROM {table_name}"
        if where_condition:
            query += f" WHERE {where_condition}"
        if order_by:
            query += f" ORDER BY {order_by}"
        if limit:
            query += f" LIMIT {limit}"
        results = self.execute_query(query).fetchall()
        if results:
            headers = [description[0] for description in self.execute_query(query).description]
            return pd.DataFrame(results, columns=headers)
        else:
            return pd.DataFrame()
    def update_data(self, table_name, set_values, where_condition):
        query = f"UPDATE {table_name} SET {set_values} WHERE {where_condition}"
        logger.info('更新数据sql'+ query)
        self.execute_query(query)
        self.commit()
    def delete_data(self, table_name, where_condition):
        query = f"DELETE FROM {table_name} WHERE {where_condition}"
        self.execute_query(query)
        self.commit()
    def check_table_exists(self, table_name):
        query = f"SELECT name FROM sqlite_master WHERE type='table' AND name='{table_name}'"
        result = self.execute_query(query).fetchone()
        return result is not None
    def add_column_if_not_exists(self, table_name, column_name, column_type):
        # 查询表结构
        query = f"PRAGMA table_info({table_name})"
        self.execute_query(query)
        columns = [column[1] for column in self.cursor.fetchall()]
        # 判断列是否存在
        if column_name not in columns:
            # 如果列不存在，则添加列
            query = f"ALTER TABLE {table_name} ADD COLUMN {column_name} {column_type}"
            self.execute_query(query)
            self.commit()
            print(f"Column '{column_name}' added to table '{table_name}' successfully.")
        else:
            print(f"Column '{column_name}' already exists in table '{table_name}'.")
 if __name__ == '__main__':
    print('This is a tool, not a script.')
--- a/main.py
+++ b/main.py
@ -0,0 +1,176 @@
 # 读取配置
 # from config_jingbo import *
 # from config_tansuanli import *
 from config_juxiting import *
 from lib.dataread import *
 from lib.tools import *
 from models.nerulforcastmodels import ex_Model,model_losss,brent_export_pdf,tansuanli_export_pdf,pp_export_pdf
 import glob
 import torch
 torch.set_float32_matmul_precision("high")
 sqlitedb = SQLiteHandler(db_name) 
 sqlitedb.connect()
 def predict_main():
    signature = BinanceAPI(APPID, SECRET)
    etadata = EtaReader(signature=signature,
                            classifylisturl = classifylisturl,
                            classifyidlisturl=classifyidlisturl,
                            edbcodedataurl=edbcodedataurl,
                            edbcodelist=edbcodelist,
                            edbdatapushurl=edbdatapushurl,
                            edbdeleteurl=edbdeleteurl,
                            edbbusinessurl=edbbusinessurl
                            )
    # 获取数据
    if is_eta:
        # eta数据
        logger.info('从eta获取数据...')
        signature = BinanceAPI(APPID, SECRET)
        etadata = EtaReader(signature=signature,
                                    classifylisturl = classifylisturl,
                                    classifyidlisturl=classifyidlisturl,
                                    edbcodedataurl=edbcodedataurl,
                                    edbcodelist=edbcodelist,
                                    edbdatapushurl=edbdatapushurl,
                                    edbdeleteurl=edbdeleteurl,
                                    edbbusinessurl=edbbusinessurl,
                                    )
        # df_zhibiaoshuju,df_zhibiaoliebiao = etadata.get_eta_api_yuanyou_data(data_set=data_set,dataset=dataset)  # 原始数据，未处理
        df_zhibiaoshuju,df_zhibiaoliebiao =  etadata.get_eta_api_pp_data(data_set=data_set,dataset=dataset)  # 原始数据，未处理
        # 数据处理
        df = datachuli(df_zhibiaoshuju,df_zhibiaoliebiao,y = y,dataset=dataset,add_kdj=add_kdj,is_timefurture=is_timefurture,end_time=end_time) 
    else:
        logger.info('读取本地数据：'+os.path.join(dataset,data_set))
        df = getdata(filename=os.path.join(dataset,data_set),y=y,dataset=dataset,add_kdj=add_kdj,is_timefurture=is_timefurture,end_time=end_time)  # 原始数据，未处理
    # 更改预测列名称
    df.rename(columns={y:'y'},inplace=True)
    if is_edbnamelist:
        df = df[edbnamelist]     
    df.to_csv(os.path.join(dataset,'指标数据.csv'), index=False)
    # 保存最新日期的y值到数据库
    # 取第一行数据存储到数据库中
    first_row = df[['ds','y']].tail(1)
    # 将最新真实值保存到数据库
    if not sqlitedb.check_table_exists('trueandpredict'):
        first_row.to_sql('trueandpredict',sqlitedb.connection,index=False)
    else:
        for row in first_row.itertuples(index=False):
            row_dict = row._asdict()
            check_query = sqlitedb.select_data('trueandpredict',where_condition = f"ds = '{row.ds}'")
            if len(check_query) > 0:
                set_clause = ", ".join([f"{key} = '{value}'" for key, value in row_dict.items()])
                sqlitedb.update_data('trueandpredict',set_clause,where_condition = f"ds = '{row.ds}'")
                continue
            sqlitedb.insert_data('trueandpredict',tuple(row_dict.values()),columns=row_dict.keys())
    import datetime
    # 判断当前日期是不是周一
    is_weekday = datetime.datetime.now().weekday() == 3
    if is_weekday:
        logger.info('今天是周一，更新预测模型')
        # 计算最近20天预测残差最低的模型名称
        model_results = sqlitedb.select_data('trueandpredict',order_by = "ds DESC",limit = "20")
        model_results = model_results.dropna()
        modelnames  = model_results.columns.to_list()[2:] 
        for col in model_results[modelnames].select_dtypes(include=['object']).columns:
            model_results[col] = model_results[col].astype(np.float32)
        # 计算每个预测值与真实值之间的偏差率
        for model in modelnames:
            model_results[f'{model}_abs_error_rate'] = abs(model_results['y'] - model_results[model]) / model_results['y']
        # 获取每行对应的最小偏差率值
        min_abs_error_rate_values = model_results.apply(lambda row: row[[f'{model}_abs_error_rate' for model in modelnames]].min(), axis=1)
        # 获取每行对应的最小偏差率值对应的列名
        min_abs_error_rate_column_name = model_results.apply(lambda row: row[[f'{model}_abs_error_rate' for model in modelnames]].idxmin(), axis=1)
        # 将列名索引转换为列名
        min_abs_error_rate_column_name = min_abs_error_rate_column_name.map(lambda x: x.split('_')[0])
        # 取出现次数最多的模型名称
        most_common_model = min_abs_error_rate_column_name.value_counts().idxmax()
        logger.info(f"最近20天预测残差最低的模型名称：{most_common_model}")
        # 保存结果到数据库
        if not sqlitedb.check_table_exists('most_model'):
            sqlitedb.create_table('most_model',columns="ds datetime, most_common_model TEXT")
        sqlitedb.insert_data('most_model',(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),most_common_model,),columns=('ds','most_common_model',))
    if is_corr:
        df = corr_feature(df=df)
    df1 = df.copy()  # 备份一下，后面特征筛选完之后加入ds y 列用
    logger.info(f"开始训练模型...")
    row,col = df.shape
    now = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
    # ex_Model(df,
    #         horizon=horizon,
    #         input_size=input_size,
    #         train_steps=train_steps,
    #         val_check_steps=val_check_steps,
    #         early_stop_patience_steps=early_stop_patience_steps,
    #         is_debug=is_debug,
    #         dataset=dataset,
    #         is_train=is_train,
    #         is_fivemodels=is_fivemodels,
    #         val_size=val_size,
    #         test_size=test_size,
    #         settings=settings,
    #         now=now,
    #         etadata = etadata,
    #         modelsindex = modelsindex,
    #         data = data,
    #         is_eta=is_eta,
    #         )
    # # 模型评估
    model_results3 = model_losss(sqlitedb)
    # 模型报告
    title = f'{settings}--{now}-预测报告' # 报告标题
    brent_export_pdf(dataset=dataset,num_models = 5 if is_fivemodels else 22,time=end_time,
                reportname=reportname,sqlitedb=sqlitedb),
    # pp_export_pdf(dataset=dataset,num_models = 5 if is_fivemodels else 22,time=end_time,
                # reportname=reportname),
    logger.info('模型训练完成')
    # tansuanli_export_pdf(dataset=dataset,num_models = 5 if is_fivemodels else 22,end_time=end_time,reportname=reportname)
    # # LSTM 单变量模型
    # ex_Lstm(df,input_seq_len=input_size,output_seq_len=horizon,is_debug=is_debug,dataset=dataset)
    # # lstm 多变量模型
    # ex_Lstm_M(df,n_days=input_size,out_days=horizon,is_debug=is_debug,datasetpath=dataset)
    # # GRU 模型
    # # ex_GRU(df)
    # 发送邮件
    m = SendMail(
        username=username,
        passwd=passwd,
        recv=recv,
        title=title,
        content=content,
        file=max(glob.glob(os.path.join(dataset,'*.pdf')), key=os.path.getctime),
        ssl=ssl,
    )
    # m.send_mail()   
 if __name__ == '__main__':
    predict_main()
--- a/maincanshu.py
+++ b/maincanshu.py
@ -0,0 +1,123 @@
 # 读取配置
 from config_jingbo import *
 from lib.tools import *
 from lib.dataread import *
 from models.nerulforcastmodels import ex_Model,model_losss,brent_export_pdf
 from models.lstmmodels import ex_Lstm_M,ex_Lstm
 from models.grumodels import ex_GRU
 import glob
 import torch
 torch.set_float32_matmul_precision("high")
 if __name__ == '__main__':
    signature = BinanceAPI(APPID, SECRET)
    # 遍历参数训练模型
    input_size_list = [14]
    horizon_list = [7]
    train_steps_list = [500,1000,1500,2000]
    k_list = [10,18,25,50,100]
    end_time_list = ['2024-07-03']
    is_debug = False
    is_fivemodels = False # 是否使用之前保存的最佳的5个模型
    delweekenday = True
    # 组合上面三个参数
    for i in range(len(input_size_list)):
        for h in range(len(horizon_list)):
            for j in range(len(train_steps_list)):
                for k in range(len(k_list)):
                    for end_time in end_time_list:
                        input_size = input_size_list[i]
                        horizons = horizon_list[h]
                        train_steps = train_steps_list[j]
                        K = k_list[k]
                        settings = f'{input_size}-{horizon_list[h]}-{train_steps}-{K}-{data_set}-{end_time}-{y}' 
                        logger.info(f'当前配置：{settings}')
                        # 获取数据
                        if is_eta:
                            etadata = EtaReader(signature=signature,
                                                classifylisturl = classifylisturl,
                                                classifyidlisturl=classifyidlisturl,
                                                edbcodedataurl=edbcodedataurl,
                                                edbcodelist=edbcodelist
                                                )
                            df = etadata.get_eta_api_data(data_set=data_set,dataset=dataset)  # 原始数据，未处理
                        else:
                            filename = os.path.join(dataset,data_set)
                            logger.info(f'未启用Eta数据,将读取本地数据{filename}')
                            df = pd.read_excel(filename,sheet_name='指标数据')
                        # 数据处理
                        df = datachuli(df=df,dataset=dataset,end_time=end_time,y=y,delweekenday=delweekenday)
                        if is_timefurture:
                            df = addtimecharacteristics(df=df,dataset=dataset)
                        # 更改预测列名称
                        df.rename(columns={y:'y'},inplace=True)
                        logger.info(f"开始训练模型...")
                        row,col = df.shape
                        logger.info(f'当前配置：{settings}')
                        # 获取日期时间 计算今天日期 %Y-%m-%d-%H-%M-%S
                        from datetime import datetime
                        now = datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
                        ex_Model(df,
                                    horizon=horizon,
                                    input_size=input_size,
                                    train_steps=train_steps,
                                    val_check_steps=val_check_steps,
                                    early_stop_patience_steps=early_stop_patience_steps,
                                    is_debug=is_debug,
                                    dataset=dataset,
                                    is_train=is_train,
                                    is_fivemodels=is_fivemodels,
                                    val_size=val_size,
                                    test_size=test_size,
                                    settings=settings,
                                    now=now
                                    )
                        # 模型评估
                        model_results3 = model_losss(df,dataset=dataset,horizon=horizon)
                        # 模型报告
                        reportname = f'{settings}--{now}-预测报告.pdf' # 报告文件名
                        reportname = reportname.replace(':', '-') # 替换冒号
                        title = f'{settings}--{now}-预测报告' # 报告标题
                        brent_export_pdf(dataset=dataset,num_models = 5 if is_fivemodels else 22,time=end_time,
                                    reportname=reportname),
                        # 发送邮件
                        m = SendMail(
                            username=username,
                            passwd=passwd,
                            recv=recv,
                            title=title,
                            content=content,
                            file=max(glob.glob(os.path.join(dataset,reportname)), key=os.path.getctime),
                            ssl=ssl,
                        )
                        # m.send_mail()   
    # # LSTM 单变量模型
    # ex_Lstm(df,input_seq_len=input_size,output_seq_len=horizon,is_debug=is_debug,dataset=dataset)
    # # lstm 多变量模型
    # ex_Lstm_M(df,n_days=input_size,out_days=horizon,is_debug=is_debug,datasetpath=dataset)
    # # GRU 模型
    # # ex_GRU(df)
    # 发送邮件
    # m = SendMail(
    #     username=username,
    #     passwd=passwd,
    #     recv=recv,
    #     title=title,
    #     content=content,
    #     file=max(glob.glob(os.path.join(dataset,'*.pdf')), key=os.path.getctime),
    #     ssl=ssl,
    # )
    # m.send_mail()   
--- a/models/grumodels.py
+++ b/models/grumodels.py
@ -0,0 +1,164 @@
 import os
 import numpy as np
 import pandas as pd
 import matplotlib.pyplot as plt
 import matplotlib as mpl
 mpl.rcParams['font.family'] = 'SimHei'  # 设置字体为黑体
 import random
 import string
 import time
 import base64
 import requests
 from hashlib import sha256
 from hmac import HMAC
 from math import sqrt
 from sklearn.preprocessing import MinMaxScaler
 from sklearn.metrics import mean_squared_error
 from keras.models import Sequential
 from keras.layers import GRU, Dense, Dropout
 from keras.optimizers import Adam
 from keras.callbacks import EarlyStopping
 # 数据获取和预处理部分
 from sklearn.preprocessing import MinMaxScaler
 import pandas as pd
 import datetime
 import string
 import base64
 import requests
 import random
 import time
 import re 
 import hmac
 import hashlib
 def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    '''
    将时间序列数据转换为监督学习数据
    :param data:数据集
    :param n_in: 输入序列长度，默认为1
    :param n_out:输出序列长度，默认为1
    :param dropnan:
    :return:
    '''
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)
    # 将3组输入数据依次向下移动3，2，1行，将数据加入cols列表（技巧：(n_in, 0, -1)中的-1指倒序循环，步长为1）
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j + 1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    # 将一组输出数据加入cols列表（技巧：其中i=0）
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j + 1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j + 1, i)) for j in range(n_vars)]
    # cols列表(list)中现在有四块经过下移后的数据(即：df(-3),df(-2),df(-1),df)，将四块数据按列 并排合并
    agg = pd.concat(cols, axis=1)
    # 给合并后的数据添加列名
    agg.columns = names
    # 删除NaN值列
    if dropnan:
        agg.dropna(inplace=True)
    return agg
 def ex_GRU(df):
    dataset = df.copy()
    dataset.set_index('ds', inplace=True)
    values = dataset.values
    # 标准化/放缩 特征值在（0,1）之间
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled = scaler.fit_transform(values)
    # 数据准备
    n_days = 14  # 使用过去14天的数据
    n_features = scaled.shape[1]  # 特征数量根据实际数据调整
    reframed = series_to_supervised(scaled, n_days, 1)
    # 划分训练集和测试集
    values = reframed.values
    n_train_days = int(values.shape[0] * 0.8)
    train = values[:n_train_days, :]
    test = values[n_train_days:, :]
    # 输入输出数据
    n_obs = n_days * n_features
    train_X, train_y = train[:, :n_obs], train[:, -n_features]
    test_X, test_y = test[:, :n_obs], test[:, -n_features]
    # 输入数据重塑为 [样本数, 时间步长, 特征数]
    train_X = train_X.reshape((train_X.shape[0], n_days, n_features))
    test_X = test_X.reshape((test_X.shape[0], n_days, n_features))
    # 构造GRU模型
    model = Sequential()
    model.add(GRU(50, return_sequences=True, input_shape=(n_days, n_features)))
    model.add(Dropout(0.2))
    model.add(GRU(50))
    model.add(Dropout(0.2))
    model.add(Dense(1))
    # 编译模型
    optimizer = Adam(learning_rate=0.001)
    model.compile(loss='mean_squared_error', optimizer=optimizer)
    # 定义回调函数
    early_stopping = EarlyStopping(monitor='val_loss', patience=10)
    # 训练模型
    history = model.fit(train_X, train_y, epochs=100, batch_size=72, validation_data=(test_X, test_y), verbose=2, shuffle=False, callbacks=[early_stopping])
    # 执行预测
    yhat = model.predict(test_X)
    test_X = test_X.reshape((test_X.shape[0], n_days * n_features))
    # 将预测列和真实列数据逆归一化
    inv_yhat = np.concatenate((yhat, test_X[:, -n_features+1:]), axis=1)
    inv_yhat = scaler.inverse_transform(inv_yhat)
    inv_yhat = inv_yhat[:, 0]
    test_y = test_y.reshape((len(test_y), 1))
    inv_y = np.concatenate((test_y, test_X[:, -n_features+1:]), axis=1)
    inv_y = scaler.inverse_transform(inv_y)
    inv_y = inv_y[:, 0]
    # 计算RMSE
    rmse = sqrt(mean_squared_error(inv_y, inv_yhat))
    print('Test RMSE: %.3f' % rmse)
    # 可视化结果
    n = 150
    time_axis_data = np.array(range(n))
    time_axis_future = np.array(range(n + 7))
    inv_y = inv_y[-n:]
    inv_yhat = inv_yhat[-n-7:]
    fig, ax = plt.subplots(2, 1, gridspec_kw={'height_ratios': [5, 4]})
    fig.set_size_inches(8, 6)
    ax[0].plot(time_axis_data, inv_y, label='历史价格')
    ax[0].plot(time_axis_future, inv_yhat, linestyle='dashed', label='预测价格')
    ax[0].set_xlabel('时间')
    ax[0].set_ylabel('价格')
    ax[0].legend()
    ax[0].set_title('布伦特_多价格预测')
    ax[0].set_ylim(min(inv_y[-n - 7:]) * 0.4, max(inv_y[-n - 7:]) * 1.6)
    ax[1].axis('off')
    table_data = [[f"Day {i + 1}", "{:.2f}".format(val)] for i, val in enumerate(inv_yhat[-7:])]
    table = ax[1].table(cellText=table_data, colLabels=['Day', 'Prediction'], loc='center')
    table.auto_set_font_size(True)
    filename = os.path.basename(__file__).split('.')[0]
    plt.savefig(filename + '.png')
    plt.show()
--- a/models/lstmmodels.py
+++ b/models/lstmmodels.py
@ -0,0 +1,255 @@
 import numpy as np
 from sklearn.preprocessing import MinMaxScaler
 from sklearn.metrics import mean_squared_error
 from sklearn.model_selection import train_test_split
 from tensorflow.keras.models import Sequential
 from tensorflow.keras.layers import LSTM, Dense
 import datetime
 import matplotlib.pyplot as plt
 import pandas as pd
 import os
 import random
 import string
 import time
 import base64
 from hashlib import sha256
 from hmac import HMAC
 import requests
 import csv
 from numpy import concatenate
 from math import sqrt
 def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    '''
    将时间序列数据转换为监督学习数据
    :param data:数据集
    :param n_in: 输入序列长度，默认为1
    :param n_out:输出序列长度，默认为1
    :param dropnan:
    :return:
    '''
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)
    # 将3组输入数据依次向下移动3，2，1行，将数据加入cols列表（技巧：(n_in, 0, -1)中的-1指倒序循环，步长为1）
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j + 1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    # 将一组输出数据加入cols列表（技巧：其中i=0）
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j + 1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j + 1, i)) for j in range(n_vars)]
    # cols列表(list)中现在有四块经过下移后的数据(即：df(-3),df(-2),df(-1),df)，将四块数据按列 并排合并
    agg = pd.concat(cols, axis=1)
    # 给合并后的数据添加列名
    agg.columns = names
    # 删除NaN值列
    if dropnan:
        agg.dropna(inplace=True)
    return agg
 def createXY(dataset,n_past):
  dataX = []
  dataY = []
  print(dataset.shape[1])
  for i in range(n_past, len(dataset)):
          dataX.append(dataset[i - n_past:i, 0:dataset.shape[1]])
          dataY.append(dataset[i,0])
  return np.array(dataX),np.array(dataY)
 def ex_Lstm_M(df,n_days=14,out_days=7,is_debug=False,datasetpath=''):
    # dataset = pd.read_csv('brentpricepredict.csv',encoding='utf-8')
    dataset = df.copy()
    dataset.set_index('ds', inplace=True)
    values = dataset.values
    if is_debug:
        # values = values[-1000:]
        pass
    # 标准化/放缩 特征值在（0,1）之间
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled = scaler.fit_transform(values)
    # 用14天数据预测七天数据
    n_features = dataset.shape[1]
    # 构造一个14->7的监督学习型数据
    reframed = series_to_supervised(scaled, n_days, out_days)
    # 切分数据集
    values = reframed.values
    # 用80%的数据来训练，20%的数据来测试
    n_train = int(len(dataset) * 0.8)
    train = values[:n_train, :]
    test = values[n_train:, :]
    # 切分输入输出
    n_obs = n_days * n_features
    # 倒数第19列作为Y
    train_X, train_y = train[:, :n_obs], train[:, -n_features]
    test_X, test_y = test[:, :n_obs], test[:, -n_features]
    # 将数据转换为3D输入，timesteps=14，14条数据预测7条 [samples, timesteps, features]
    train_X = train_X.reshape((train_X.shape[0], n_days, n_features))
    test_X = test_X.reshape((test_X.shape[0], n_days, n_features))
    print(train_X.shape, train_y.shape, test_X.shape, test_y.shape)
    # 设计网络
    model = Sequential()
    model.add(LSTM(50, input_shape=(train_X.shape[1], train_X.shape[2])))
    model.add(Dense(1))
    model.compile(loss='mae', optimizer='adam')
    # 拟合网络
    history = model.fit(train_X, train_y, epochs=100, batch_size=72, validation_data=(test_X, test_y), verbose=2,
                        shuffle=False)
    # 执行预测
    yhat = model.predict(test_X)
    # 将数据格式化成 n行 * 24列
    test_X = test_X.reshape((test_X.shape[0], n_days * n_features))
    # 将预测列据和后7列数据拼接，因后续逆缩放时，数据形状要符合 n行*20列 的要求
    inv_yhat = concatenate((yhat, test_X[:, -n_features+1:]), axis=1)
    # 对拼接好的数据进行逆缩放
    inv_yhat = scaler.inverse_transform(inv_yhat)
    inv_yhat = inv_yhat[:, 0]
    print(inv_yhat)
    test_y = test_y.reshape((len(test_y), 1))
    # 将真实列据和后7列数据拼接，因后续逆缩放时，数据形状要符合 n行*20列 的要求
    inv_y = concatenate((test_y, test_X[:, -n_features+1:]), axis=1)
    # 对拼接好的数据进行逆缩放
    inv_y = scaler.inverse_transform(inv_y)
    inv_y = inv_y[:, 0]
    # 计算RMSE
    rmse = sqrt(mean_squared_error(inv_y, inv_yhat))
    print('Test RMSE: %.3f' % rmse)
    # 可视化结果
    # 保留n天历史数据
    n = len(inv_y) - 7
    # 设置要可视化的值
    time_axis_data = np.array(range(n))
    time_axis_future = np.array(range(n + 7))
    inv_y = inv_y[-n:]
    inv_yhat = inv_yhat[-n-7:]
    # Plot data and future predictions
    fig, ax = plt.subplots(2, 1, gridspec_kw={'height_ratios': [5, 4]})
    # 设置画布大小
    fig.set_size_inches(6, 6)
    # 第一个子图画历史价格和预测价格
    ax[0].plot(time_axis_data, inv_y, label='历史价格')
    ax[0].plot(time_axis_future, inv_yhat, linestyle='dashed', label='预测价格')
    ax[0].set_xlabel('时间')
    ax[0].set_ylabel('价格')
    ax[0].legend()
    # 设置标题
    ax[0].set_title('布伦特_多价格预测')
    # 设置y轴范围
    ax[0].set_ylim(50, 120)
    # 第二个子图画表格，展示预测价格
    ax[1].axis('off')
    table_data = [[f"Day {i + 1}", "{:.2f}".format(val)] for i, val in enumerate(inv_yhat[-7:])]
    table = ax[1].table(cellText=table_data, colLabels=['Day', 'Prediction'], loc='center')
    # 设置表格内容居中
    table.auto_set_font_size(True)
    # 保存图片
    filename = os.path.basename(__file__).split('.')[0]
    plt.savefig(os.path.join(datasetpath,filename + '_M.png'))
    # plt.show()
 def ex_Lstm(df,input_seq_len=50, output_seq_len=7,is_debug=False,dataset=''):
    # 将日期列转换为 datetime 类型（如果尚未转换）  
    df['ds'] = pd.to_datetime(df['ds'])  
    # 分离出数值列（排除日期列）  
    numeric_df = df.select_dtypes(include=['int64', 'float64'])  
    prices = df
    # prices = df
    # print(data1)
    # Remove any NaN values
    df = df.drop('ds', axis=1) 
    prices = np.array(df, dtype=float)  # convert to NumPy array of floats
    prices = prices[~np.isnan(prices)]
    if is_debug:
        prices = prices[-300:]
    # Prepare input sequences
    inputs = []
    for i in range(len(prices)-input_seq_len-output_seq_len+1):
        inputs.append(prices[i:i+input_seq_len])
    inputs = np.array(inputs)
    # Prepare output sequences
    outputs = []
    for i in range(input_seq_len, len(prices)-output_seq_len+1):
        outputs.append(prices[i:i+output_seq_len])
    outputs = np.array(outputs)
    # Split dataset into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(inputs, outputs, test_size=0.2)
    # Normalize data
    scaler_in = MinMaxScaler()
    X_train = scaler_in.fit_transform(X_train)
    X_test = scaler_in.transform(X_test)
    scaler_out = MinMaxScaler()
    y_train = scaler_out.fit_transform(y_train)
    y_test = scaler_out.transform(y_test)
    # Define LSTM model
    model = Sequential()
    model.add(LSTM(128, activation='relu', input_shape=(input_seq_len, 1)))
    model.add(Dense(output_seq_len))
    model.compile(optimizer='adam', loss='mse')
    # Train LSTM model
    model.fit(X_train.reshape(-1, input_seq_len, 1), y_train, epochs=100, batch_size=64, validation_data=(X_test.reshape(-1, input_seq_len, 1), y_test))
    # Evaluate LSTM model
    mse = model.evaluate(X_test.reshape(-1, input_seq_len, 1), y_test)
    # Make future predictions
    future_inputs = np.array([prices[-input_seq_len:]])
    future_inputs = scaler_in.transform(future_inputs)
    future_predictions = model.predict(future_inputs.reshape(-1, input_seq_len, 1))
    future_predictions = scaler_out.inverse_transform(future_predictions)[0]
    # Print results
    print("MSE: ", mse)
    print("Future predictions: ", future_predictions)
    # Generate time axis for data and future predictions
    time_axis_data = np.arange(len(prices))
    time_axis_future = np.arange(len(prices), len(prices) + len(future_predictions))
    # Concatenate time axis and data
    time_axis = np.concatenate((time_axis_data, time_axis_future))
    # Concatenate data and future predictions
    data_and_predictions = np.concatenate((prices, future_predictions))
    # Plot data and future predictions
    fig, ax = plt.subplots(2, 1, gridspec_kw={'height_ratios': [3, 1]})
    # First subplot: Data and Future Predictions
    ax[0].plot(time_axis, data_and_predictions, label='Data and Future Predictions')
    ax[0].plot(time_axis_future, future_predictions, linestyle='dashed', label='Future Predictions')
    ax[0].set_xlabel('Time')
    ax[0].set_ylabel('Price')
    ax[0].legend()
    # Second subplot: Table for Future Predictions
    ax[1].axis('off')
    table_data = [[f"Day {i+1}", "{:.2f}".format(val)] for i, val in enumerate(future_predictions)]
    table = ax[1].table(cellText=table_data, colLabels=['Day', 'Prediction'], loc='center')
    plt.savefig(os.path.join(dataset,'lstmmodels.png'))
    # plt.show()
--- a/models/nerulforcastmodels.py
+++ b/models/nerulforcastmodels.py
--- a/pushdata.py
+++ b/pushdata.py
@ -0,0 +1,104 @@
 # 读取配置
 from config_jingbo import *
 # from config_tansuanli import *
 from lib.tools import *
 from lib.dataread import *
 from models.nerulforcastmodels import ex_Model,model_losss,brent_export_pdf,tansuanli_export_pdf
 from models.lstmmodels import ex_Lstm_M,ex_Lstm
 from models.grumodels import ex_GRU
 import glob
 import torch
 torch.set_float32_matmul_precision("high")
 if __name__ == '__main__':
    signature = BinanceAPI(APPID, SECRET)
    etadata = EtaReader(signature=signature,
                            classifylisturl = classifylisturl,
                            classifyidlisturl=classifyidlisturl,
                            edbcodedataurl=edbcodedataurl,
                            edbcodelist=edbcodelist,
                            edbdatapushurl = edbdatapushurl,
                            edbdeleteurl = edbdeleteurl,
                            edbbusinessurl = edbbusinessurl
                            )
    models = [
        'NHITS',
        'Informer',
        'LSTM',
        'iTransformer',
        'TSMixer',
        'TSMixerx',
        'PatchTST',
        'RNN',
        'GRU',
        'TCN',
        'BiTCN',
        'DilatedRNN',
        'MLP',
        'DLinear',
        'NLinear',
        'TFT',
        'FEDformer',
        'StemGNN',
        'MLPMultivariate',
        'TiDE',
        'DeepNPT']
    # eta自由数据指标编码
    modelsindex = {
            'NHITS': 'SELF0000001',
            'Informer':'SELF0000057',
            'LSTM':'SELF0000058',
            'iTransformer':'SELF0000059',
            'TSMixer':'SELF0000060',
            'TSMixerx':'SELF0000061',
            'PatchTST':'SELF0000062',
            'RNN':'SELF0000063',
            'GRU':'SELF0000064',
            'TCN':'SELF0000065',
            'BiTCN':'SELF0000066',
            'DilatedRNN':'SELF0000067',
            'MLP':'SELF0000068',
            'DLinear':'SELF0000069',
            'NLinear':'SELF0000070',
            'TFT':'SELF0000071',
            'FEDformer':'SELF0000072',
            'StemGNN':'SELF0000073',
            'MLPMultivariate':'SELF0000074',
            'TiDE':'SELF0000075',
            'DeepNPT':'SELF0000076'
        }
    # df_predict = pd.read_csv('dataset/predict.csv',encoding='gbk')
    # # df_predict.rename(columns={'ds':'Date'},inplace=True)
    # for m in modelsindex.keys():
    #     list = []
    #     for date,value in zip(df_predict['ds'],df_predict[m]):
    #         list.append({'Date':date,'Value':value})
    #     data['DataList'] = list
    #     data['IndexCode'] = modelsindex[m]
    #     data['IndexName'] = f'价格预测{m}模型'
    #     data['Remark'] = m
    #     # print(data['DataList'])
    #     etadata.push_data(data) 
    # 删除指标
    # IndexCodeList = ['SELF0000055']
    # for i in range(1,57):
    #     if i < 10 : i = f'0{i}'
    #     IndexCodeList.append(f'SELF00000{i}')
    # print(IndexCodeList)
    # etadata.del_zhibiao(IndexCodeList)
    # 删除特定日期的值
    indexcodelist = modelsindex.values()
    for indexcode in indexcodelist:
        data = {
            "IndexCode": indexcode, #指标编码
            "StartDate": "2020-04-20", #指标需要删除的开始日期（>=），如果开始日期和结束日期相等，那么就是删除该日期
            "EndDate": "2024-05-28" #指标需要删除的结束日期（<=），如果开始日期和结束日期相等，那么就是删除该日期
        }
        # etadata.del_business(data)
--- a/copy.py
+++ b/copy.py
@ -0,0 +1,62 @@
 import logging
 import os
 import numpy as np
 import pandas as pd
 import matplotlib.pyplot as plt
 from neuralforecast import NeuralForecast
 from neuralforecast.models import NHITS
 from neuralforecast.utils import AirPassengersPanel
 from mlforecast.utils import PredictionIntervals
 from neuralforecast.losses.pytorch import DistributionLoss, MAE
 os.environ['NIXTLA_ID_AS_COL'] = '1'
 AirPassengersPanel_train = AirPassengersPanel[AirPassengersPanel['ds'] < AirPassengersPanel['ds'].values[-12]].reset_index(drop=True)
 AirPassengersPanel_test = AirPassengersPanel[AirPassengersPanel['ds'] >= AirPassengersPanel['ds'].values[-12]].reset_index(drop=True)
 AirPassengersPanel_test['y'] = np.nan
 AirPassengersPanel_test['y_[lag12]'] = np.nan
 horizon = 12
 input_size = 24
 prediction_intervals = PredictionIntervals()
 models = [NHITS(h=horizon, input_size=input_size, max_steps=100, loss=MAE(), scaler_type="robust"), 
          NHITS(h=horizon, input_size=input_size, max_steps=100, loss=DistributionLoss("Normal", level=[90]), scaler_type="robust")]
 nf = NeuralForecast(models=models, freq='ME')
 nf.fit(AirPassengersPanel_train, prediction_intervals=prediction_intervals)
 preds = nf.predict(futr_df=AirPassengersPanel_test, level=[90])
 fig, (ax1, ax2) = plt.subplots(2, 1, figsize = (20, 7))
 plot_df = pd.concat([AirPassengersPanel_train, preds])
 plot_df = plot_df[plot_df['unique_id']=='Airline1'].drop(['unique_id','trend','y_[lag12]'], axis=1).iloc[-50:]
 ax1.plot(plot_df['ds'], plot_df['y'], c='black', label='True')
 ax1.plot(plot_df['ds'], plot_df['NHITS'], c='blue', label='median')
 ax1.fill_between(x=plot_df['ds'][-12:], 
                 y1=plot_df['NHITS-lo-90'][-12:].values,
                 y2=plot_df['NHITS-hi-90'][-12:].values,
                 alpha=0.4, label='level 90')
 ax1.set_title('AirPassengers Forecast - Uncertainty quantification using Conformal Prediction', fontsize=18)
 ax1.set_ylabel('Monthly Passengers', fontsize=15)
 ax1.set_xticklabels([])
 ax1.legend(prop={'size': 10})
 ax1.grid()
 ax2.plot(plot_df['ds'], plot_df['y'], c='black', label='True')
 ax2.plot(plot_df['ds'], plot_df['NHITS1'], c='blue', label='median')
 ax2.fill_between(x=plot_df['ds'][-12:], 
                 y1=plot_df['NHITS1-lo-90'][-12:].values,
                 y2=plot_df['NHITS1-hi-90'][-12:].values,
                 alpha=0.4, label='level 90')
 ax2.set_title('AirPassengers Forecast - Uncertainty quantification using Normal distribution', fontsize=18)
 ax2.set_ylabel('Monthly Passengers', fontsize=15)
 ax2.set_xlabel('Timestamp [t]', fontsize=15)
 ax2.legend(prop={'size': 10})
 ax2.grid()
--- a/八个维度demo.py
+++ b/八个维度demo.py
@ -0,0 +1,200 @@
 import pandas as pd
 from datasetsforecast.long_horizon import LongHorizon
 # Change this to your own data to try the model
 Y_df, _, _ = LongHorizon.load(directory='./', group='ETTm2')
 Y_df['ds'] = pd.to_datetime(Y_df['ds'])
 # For this excercise we are going to take 20% of the DataSet
 n_time = len(Y_df.ds.unique())
 val_size = int(.2 * n_time)
 test_size = int(.2 * n_time)
 Y_df.groupby('unique_id').head(2)
 import matplotlib.pyplot as plt
 # We are going to plot the temperature of the transformer 
 # and marking the validation and train splits
 u_id = 'HUFL'
 x_plot = pd.to_datetime(Y_df[Y_df.unique_id==u_id].ds)
 y_plot = Y_df[Y_df.unique_id==u_id].y.values
 x_val = x_plot[n_time - val_size - test_size]
 x_test = x_plot[n_time - test_size]
 fig = plt.figure(figsize=(10, 5))
 fig.tight_layout()
 plt.plot(x_plot, y_plot)
 plt.xlabel('Date', fontsize=17)
 plt.ylabel('HUFL [15 min temperature]', fontsize=17)
 plt.axvline(x_val, color='black', linestyle='-.')
 plt.axvline(x_test, color='black', linestyle='-.')
 plt.text(x_val, 5, '  Validation', fontsize=12)
 plt.text(x_test, 5, '  Test', fontsize=12)
 plt.grid()
 from ray import tune
 from neuralforecast.auto import AutoNHITS
 from neuralforecast.core import NeuralForecast
 horizon = 96 # 24hrs = 4 * 15 min.
 # Use your own config or AutoNHITS.default_config
 nhits_config = {
       "learning_rate": tune.choice([1e-3]),                                     # Initial Learning rate
       "max_steps": tune.choice([1000]),                                         # Number of SGD steps
       "input_size": tune.choice([5 * horizon]),                                 # input_size = multiplier * horizon
       "batch_size": tune.choice([7]),                                           # Number of series in windows
       "windows_batch_size": tune.choice([256]),                                 # Number of windows in batch
       "n_pool_kernel_size": tune.choice([[2, 2, 2], [16, 8, 1]]),               # MaxPool's Kernelsize
       "n_freq_downsample": tune.choice([[168, 24, 1], [24, 12, 1], [1, 1, 1]]), # Interpolation expressivity ratios
       "activation": tune.choice(['ReLU']),                                      # Type of non-linear activation
       "n_blocks":  tune.choice([[1, 1, 1]]),                                    # Blocks per each 3 stacks
       "mlp_units":  tune.choice([[[512, 512], [512, 512], [512, 512]]]),        # 2 512-Layers per block for each stack
       "interpolation_mode": tune.choice(['linear']),                            # Type of multi-step interpolation
       "val_check_steps": tune.choice([100]),                                    # Compute validation every 100 epochs
       "random_seed": tune.randint(1, 10),
    }
 tft_config = {
      "input_size": tune.choice([horizon]),
      "hidden_size": tune.choice([32]),
      "n_head": tune.choice([2]),
      "learning_rate": tune.loguniform(1e-4, 1e-1),
      "scaler_type": tune.choice(['robust', 'standard']),
      "max_steps": tune.choice([500, 1000]),
      "windows_batch_size": tune.choice([32]),
      "check_val_every_n_epoch": tune.choice([100]),
      "random_seed": tune.randint(1, 20),
 }
 tsmixer_config = {
       "input_size": input_size,                                                 # Size of input window
       "max_steps": tune.choice([500, 1000, 2000]),                              # Number of training iterations
       "val_check_steps": 100,                                                   # Compute validation every x steps
       "early_stop_patience_steps": 5,                                           # Early stopping steps
       "learning_rate": tune.loguniform(1e-4, 1e-2),                             # Initial Learning rate
       "n_block": tune.choice([1, 2, 4, 6, 8]),                                  # Number of mixing layers
       "dropout": tune.uniform(0.0, 0.99),                                       # Dropout
       "ff_dim": tune.choice([32, 64, 128]),                                     # Dimension of the feature linear layer
       "scaler_type": 'identity',       
    }
 tsmixerx_config = tsmixer_config.copy()
 tsmixerx_config['futr_exog_list'] = ['ex_1', 'ex_2', 'ex_3', 'ex_4']
 models = [AutoNHITS(h=horizon,
                    config=nhits_config, 
                    num_samples=5),
          AutoTFT(h=horizon,
                  loss=MAE(),
                  config=tft_config,
                  num_samples=3),
          TSMixer(h=horizon,
                input_size=input_size,
                n_series=7,
                max_steps=1000,
                val_check_steps=100,
                early_stop_patience_steps=5,
                scaler_type='identity',
                valid_loss=MAE(),
                random_seed=12345678,
                ),  
          TSMixerx(h=horizon,
                input_size=input_size,
                n_series=7,
                max_steps=1000,
                val_check_steps=100,
                early_stop_patience_steps=5,
                scaler_type='identity',
                dropout=0.7,
                valid_loss=MAE(),
                random_seed=12345678,
                futr_exog_list=['ex_1', 'ex_2', 'ex_3', 'ex_4'],
                ),
          MLPMultivariate(h=horizon,
                input_size=input_size,
                n_series=7,
                max_steps=1000,
                val_check_steps=100,
                early_stop_patience_steps=5,
                scaler_type='standard',
                hidden_size=256,
                valid_loss=MAE(),
                random_seed=12345678,
                ),                                             
          NHITS(h=horizon,
                input_size=horizon,
                max_steps=1000,
                val_check_steps=100,
                early_stop_patience_steps=5,
                scaler_type='robust',
                valid_loss=MAE(),
                random_seed=12345678,
                ),
          AutoTSMixer(h=horizon,
                    n_series=7,
                    loss=MAE(),
                    config=tsmixer_config,
                    num_samples=10,
                    search_alg=HyperOptSearch(),
                    backend='ray',
                    valid_loss=MAE()) ,
          AutoTSMixerx(h=horizon,
                    n_series=7,
                    loss=MAE(),
                    config=tsmixerx_config,
                    num_samples=10,
                    search_alg=HyperOptSearch(),
                    backend='ray',
                    valid_loss=MAE())                ]
 nf = NeuralForecast(
    models=models,
    freq='15min')
 Y_hat_df = nf.cross_validation(df=Y_df, val_size=val_size,
                               test_size=test_size, n_windows=None)
 nf.models[0].results.get_best_result().config
 y_true = Y_hat_df.y.values
 y_hat = Y_hat_df['AutoNHITS'].values
 n_series = len(Y_df.unique_id.unique())
 y_true = y_true.reshape(n_series, -1, horizon)
 y_hat = y_hat.reshape(n_series, -1, horizon)
 print('Parsed results')
 print('2. y_true.shape (n_series, n_windows, n_time_out):\t', y_true.shape)
 print('2. y_hat.shape  (n_series, n_windows, n_time_out):\t', y_hat.shape)
 fig, axs = plt.subplots(nrows=3, ncols=1, figsize=(10, 11))
 fig.tight_layout()
 series = ['HUFL','HULL','LUFL','LULL','MUFL','MULL','OT']
 series_idx = 3
 for idx, w_idx in enumerate([200, 300, 400]):
  axs[idx].plot(y_true[series_idx, w_idx,:],label='True')
  axs[idx].plot(y_hat[series_idx, w_idx,:],label='Forecast')
  axs[idx].grid()
  axs[idx].set_ylabel(series[series_idx]+f' window {w_idx}', 
                      fontsize=17)
  if idx==2:
    axs[idx].set_xlabel('Forecast Horizon', fontsize=17)
 plt.legend()
 plt.show()
 plt.close()
 from neuralforecast.losses.numpy import mae, mse
 print('MAE: ', mae(y_hat, y_true))
 print('MSE: ', mse(y_hat, y_true))
--- a/原油预测定时任务，请勿关闭.py
+++ b/原油预测定时任务，请勿关闭.py
@ -0,0 +1,14 @@
 # 定时执行cmd命令
 import os
 import time
 from main import predict_main
 while True:
    try:
        print(time.strftime('%H:%M'))
        # 判断是不是工作日且 是17：00 7：00  才执行
        if time.strftime('%A') not in ['Saturday', 'Sunday'] and time.strftime('%H:%M') in [ '18:00']:
            predict_main()
        time.sleep(60)
    except:
        pass