石油焦铝用日度预测调试完成
This commit is contained in:
parent
313e9e229d
commit
5191ff7c69
@ -18,10 +18,22 @@
|
||||
"import numpy as np\n",
|
||||
"# 变量定义\n",
|
||||
"login_url = \"http://10.200.32.39/jingbo-api/api/server/login\"\n",
|
||||
"search_url = \"http://10.200.32.39/jingbo-api/api/warehouse/dwDataItem/queryByItemNos\"\n",
|
||||
"# query_data_list_item_nos_url\n",
|
||||
"search_url = \"http://10.200.32.39/jingbo-api/api/warehouse/dwDataItem/queryByItemNos\" #jingbo-dev/api/warehouse/dwDataItem/queryDataListItemNos\n",
|
||||
"upload_url = \"http://10.200.32.39/jingbo-api/api/dw/dataValue/pushDataValueList\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"query_data_list_item_nos_data = {\n",
|
||||
" \"funcModule\": \"数据项\",\n",
|
||||
" \"funcOperation\": \"查询\",\n",
|
||||
" \"data\": {\n",
|
||||
" \"dateStart\": \"20200101\",\n",
|
||||
" \"dateEnd\": \"20241231\",\n",
|
||||
" \"dataItemNoList\": [\"Brentzdj\", \"Brentzgj\"] # 数据项编码,代表 brent最低价和最高价\n",
|
||||
" }\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"login_data = {\n",
|
||||
" \"data\": {\n",
|
||||
" \"account\": \"api_dev\",\n",
|
||||
@ -844,7 +856,7 @@
|
||||
" # headers1 = {\"Authorization\": token_push}\n",
|
||||
" # res = requests.post(url=upload_url, headers=headers1, json=data1, timeout=(3, 5))\n",
|
||||
" \n",
|
||||
" \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
@ -960,8 +972,6 @@
|
||||
" # 保存新的xls文件\n",
|
||||
" new_workbook.save(\"定性模型数据项12-11.xlsx\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
|
@ -2,7 +2,7 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
BIN
aisenzhecode/液化石油气/化工品价格预测准确率.xlsx
Normal file
BIN
aisenzhecode/液化石油气/化工品价格预测准确率.xlsx
Normal file
Binary file not shown.
@ -2,7 +2,7 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
320
config_shiyoujiao.py
Normal file
320
config_shiyoujiao.py
Normal file
@ -0,0 +1,320 @@
|
||||
import logging
|
||||
import os
|
||||
import logging.handlers
|
||||
import datetime
|
||||
from lib.tools import MySQLDB,SQLiteHandler
|
||||
|
||||
|
||||
# eta 接口token
|
||||
APPID = "XNLDvxZHHugj7wJ7"
|
||||
SECRET = "iSeU4s6cKKBVbt94htVY1p0sqUMqb2xa"
|
||||
|
||||
# eta 接口url
|
||||
sourcelisturl = 'http://10.189.2.78:8108/v1/edb/source/list'
|
||||
classifylisturl = 'http://10.189.2.78:8108/v1/edb/classify/list?ClassifyType='
|
||||
uniquecodedataurl = 'http://10.189.2.78:8108/v1/edb/data?UniqueCode=4991c37becba464609b409909fe4d992&StartDate=2024-02-01'
|
||||
classifyidlisturl = 'http://10.189.2.78:8108/v1/edb/list?ClassifyId='
|
||||
edbcodedataurl = 'http://10.189.2.78:8108/v1/edb/data?EdbCode='
|
||||
edbdatapushurl = 'http://10.189.2.78:8108/v1/edb/push'
|
||||
edbdeleteurl = 'http://10.189.2.78:8108/v1/edb/business/edb/del'
|
||||
edbbusinessurl = 'http://10.189.2.78:8108/v1/edb/business/data/del'
|
||||
edbcodelist = ['ID01385938','lmcads03 lme comdty',
|
||||
'GC1 COMB Comdty',
|
||||
'C2404171822',
|
||||
'dxy curncy',
|
||||
'S5443199 ',
|
||||
'S5479800',
|
||||
'S5443108',
|
||||
'H7358586',
|
||||
'LC3FM1 INDEX',
|
||||
'CNY REGN Curncy',
|
||||
's0105897',
|
||||
'M0067419',
|
||||
'M0066351',
|
||||
'S0266372',
|
||||
'S0266438',
|
||||
'S0266506',
|
||||
'ID01384463']
|
||||
|
||||
# 临时写死用指定的列,与上面的edbcode对应,后面更改
|
||||
edbnamelist = [
|
||||
'ds','y',
|
||||
'LME铜价',
|
||||
'黄金连1合约',
|
||||
'Brent-WTI',
|
||||
'美元指数',
|
||||
'甲醇鲁南价格',
|
||||
'甲醇太仓港口价格',
|
||||
'山东丙烯主流价',
|
||||
'丙烷(山东)',
|
||||
'FEI丙烷 M1',
|
||||
'在岸人民币汇率',
|
||||
'南华工业品指数',
|
||||
'PVC期货主力',
|
||||
'PE期货收盘价',
|
||||
'PP连续-1月',
|
||||
'PP连续-5月',
|
||||
'PP连续-9月',
|
||||
'PP:拉丝:L5E89:出厂价:华北(第二区域):内蒙古久泰新材料(日)'
|
||||
]
|
||||
|
||||
edbcodenamedict = {
|
||||
'ID01385938':'PP:拉丝:1102K:市场价:青州:国家能源宁煤(日)',
|
||||
'ID01384463':'PP:拉丝:L5E89:出厂价:华北(第二区域):内蒙古久泰新材料(日)',
|
||||
'lmcads03 lme comdty':'LME铜价',
|
||||
'GC1 COMB Comdty':'黄金连1合约',
|
||||
'C2404171822':'Brent-WTI',
|
||||
'dxy curncy':'美元指数',
|
||||
'S5443199 ':'甲醇鲁南价格',
|
||||
'S5479800':'甲醇太仓港口价格',
|
||||
'S5443108':'山东丙烯主流价',
|
||||
'H7358586':'丙烷(山东)',
|
||||
'LC3FM1 INDEX':'FEI丙烷 M1',
|
||||
'CNY REGN Curncy':'在岸人民币汇率',
|
||||
's0105897':'南华工业品指数',
|
||||
'M0067419':'PVC期货主力',
|
||||
'M0066351':'PE期货收盘价',
|
||||
'S0266372':'PP连续-1月',
|
||||
'S0266438':'PP连续-5月',
|
||||
'S0266506':'PP连续-9月',
|
||||
|
||||
}
|
||||
|
||||
# eta自有数据指标编码
|
||||
modelsindex = {
|
||||
'NHITS': 'SELF0000077',
|
||||
'Informer':'SELF0000078',
|
||||
'LSTM':'SELF0000079',
|
||||
'iTransformer':'SELF0000080',
|
||||
'TSMixer':'SELF0000081',
|
||||
'TSMixerx':'SELF0000082',
|
||||
'PatchTST':'SELF0000083',
|
||||
'RNN':'SELF0000084',
|
||||
'GRU':'SELF0000085',
|
||||
'TCN':'SELF0000086',
|
||||
'BiTCN':'SELF0000087',
|
||||
'DilatedRNN':'SELF0000088',
|
||||
'MLP':'SELF0000089',
|
||||
'DLinear':'SELF0000090',
|
||||
'NLinear':'SELF0000091',
|
||||
'TFT':'SELF0000092',
|
||||
'FEDformer':'SELF0000093',
|
||||
'StemGNN':'SELF0000094',
|
||||
'MLPMultivariate':'SELF0000095',
|
||||
'TiDE':'SELF0000096',
|
||||
'DeepNPTS':'SELF0000097'
|
||||
}
|
||||
|
||||
|
||||
|
||||
# eta 上传预测结果的请求体,后面发起请求的时候更改 model datalist 数据
|
||||
data = {
|
||||
"IndexCode": "",
|
||||
"IndexName": "价格预测模型",
|
||||
"Unit": "无",
|
||||
"Frequency": "日度",
|
||||
"SourceName": f"价格预测",
|
||||
"Remark": 'ddd',
|
||||
"DataList": [
|
||||
{
|
||||
"Date": "2024-05-02",
|
||||
"Value": 333444
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
# eta 分类
|
||||
# level:3才可以获取到数据,所以需要人工把能源化工下所有的level3级都找到
|
||||
# url = 'http://10.189.2.78:8108/v1/edb/list?ClassifyId=1214'
|
||||
#ParentId ":1160, 能源化工
|
||||
# ClassifyId ":1214,原油 3912 石油焦
|
||||
#ParentId ":1214,",就是原油下所有的数据。
|
||||
ClassifyId = 3707
|
||||
|
||||
|
||||
|
||||
############################################################################################################### 变量定义--测试环境
|
||||
server_host = '192.168.100.53'
|
||||
|
||||
login_pushreport_url = f"http://{server_host}:8080/jingbo-dev/api/server/login"
|
||||
upload_url = f"http://{server_host}:8080/jingbo-dev/api/analysis/reportInfo/researchUploadReportSave"
|
||||
upload_warning_url = f"http://{server_host}:8080/jingbo-dev/api/basicBuiness/crudeOilWarning/save"
|
||||
query_data_list_item_nos_url = f"http://{server_host}:8080/jingbo-dev/api/warehouse/dwDataItem/queryDataListItemNos"
|
||||
|
||||
login_data = {
|
||||
"data": {
|
||||
"account": "api_test",
|
||||
# "password": "MmVmNzNlOWI0MmY0ZDdjZGUwNzE3ZjFiMDJiZDZjZWU=", # Shihua@123456
|
||||
"password": "ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=", # 123456
|
||||
"tenantHashCode": "8a4577dbd919675758d57999a1e891fe",
|
||||
"terminal": "API"
|
||||
},
|
||||
"funcModule": "API",
|
||||
"funcOperation": "获取token"
|
||||
}
|
||||
|
||||
upload_data = {
|
||||
"funcModule":'研究报告信息',
|
||||
"funcOperation":'上传聚烯烃PP价格预测报告',
|
||||
"data":{
|
||||
"groupNo":'000128', # 用户组编号
|
||||
"ownerAccount":'arui', #报告所属用户账号
|
||||
"reportType":'OIL_PRICE_FORECAST', # 报告类型,固定为OIL_PRICE_FORECAST
|
||||
"fileName": '2000-40-5-50--100-原油指标数据.xlsx-Brent活跃合约--2024-09-06-15-01-29-预测报告.pdf', #文件名称
|
||||
"fileBase64": '' ,#文件内容base64
|
||||
"categoryNo":'yyjgycbg', # 研究报告分类编码
|
||||
"smartBusinessClassCode":'JXTJGYCBG', #分析报告分类编码
|
||||
"reportEmployeeCode":"E40116", # 报告人
|
||||
"reportDeptCode" :"D0044" ,# 报告部门
|
||||
"productGroupCode":"RAW_MATERIAL" # 商品分类
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
warning_data = {
|
||||
"groupNo":'000128', # 用户组编号
|
||||
"funcModule":'原油特征停更预警',
|
||||
"funcOperation":'原油特征停更预警',
|
||||
"data":{
|
||||
'WARNING_TYPE_NAME':'特征数据停更预警',
|
||||
'WARNING_CONTENT':'',
|
||||
'WARNING_DATE':''
|
||||
}
|
||||
}
|
||||
|
||||
query_data_list_item_nos_data = {
|
||||
"funcModule": "数据项",
|
||||
"funcOperation": "查询",
|
||||
"data": {
|
||||
"dateStart":"20200101",
|
||||
"dateEnd":"20241231",
|
||||
"dataItemNoList":["Brentzdj","Brentzgj"] # 数据项编码,代表 brent最低价和最高价
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
# 北京环境数据库
|
||||
host = '192.168.101.27'
|
||||
port = 3306
|
||||
dbusername ='root'
|
||||
password = '123456'
|
||||
dbname = 'jingbo_test'
|
||||
table_name = 'v_tbl_crude_oil_warning'
|
||||
|
||||
|
||||
### 开关
|
||||
is_train = False # 是否训练
|
||||
is_debug = True # 是否调试
|
||||
is_eta = True # 是否使用eta接口
|
||||
is_market = False # 是否通过市场信息平台获取特征 ,在is_eta 为true 的情况下生效
|
||||
is_timefurture = True # 是否使用时间特征
|
||||
is_fivemodels = False # 是否使用之前保存的最佳的5个模型
|
||||
is_edbcode = False # 特征使用edbcoding列表中的
|
||||
is_edbnamelist = False # 自定义特征,对应上面的edbnamelist
|
||||
is_update_eta = False # 预测结果上传到eta
|
||||
is_update_report = True # 是否上传报告
|
||||
is_update_warning_data = False # 是否上传预警数据
|
||||
is_del_corr = 0.6 # 是否删除相关性高的特征,取值为 0-1 ,0 为不删除,0.6 表示删除相关性小于0.6的特征
|
||||
is_del_tow_month = True # 是否删除两个月不更新的特征
|
||||
|
||||
|
||||
|
||||
# 连接到数据库
|
||||
db_mysql = MySQLDB(host=host, user=dbusername, password=password, database=dbname)
|
||||
db_mysql.connect()
|
||||
print("数据库连接成功",host,dbname,dbusername)
|
||||
|
||||
|
||||
# 数据截取日期
|
||||
start_year = 2020 # 数据开始年份
|
||||
end_time = '' # 数据截取日期
|
||||
freq = 'B' # 时间频率,"D": 天 "W": 周"M": 月"Q": 季度"A": 年 "H": 小时 "T": 分钟 "S": 秒 "B": 工作日
|
||||
delweekenday = True if freq == 'B' else False # 是否删除周末数据
|
||||
is_corr = False # 特征是否参与滞后领先提升相关系数
|
||||
add_kdj = False # 是否添加kdj指标
|
||||
if add_kdj and is_edbnamelist:
|
||||
edbnamelist = edbnamelist+['K','D','J']
|
||||
|
||||
### 模型参数
|
||||
y = 'AVG-金能大唐久泰青州'
|
||||
avg_cols = [
|
||||
'PP:拉丝:1102K:出厂价:青州:国家能源宁煤(日)',
|
||||
'PP:拉丝:L5E89:出厂价:华北(第二区域):内蒙古久泰新材料(日)',
|
||||
'PP:拉丝:L5E89:出厂价:河北、鲁北:大唐内蒙多伦(日)',
|
||||
'PP:拉丝:HP550J:市场价:青岛:金能化学(日)'
|
||||
]
|
||||
offsite = 80
|
||||
offsite_col = ['PP:拉丝:HP550J:市场价:青岛:金能化学(日)']
|
||||
horizon =5 # 预测的步长
|
||||
input_size = 40 # 输入序列长度
|
||||
train_steps = 50 if is_debug else 1000 # 训练步数,用来限定epoch次数
|
||||
val_check_steps = 30 # 评估频率
|
||||
early_stop_patience_steps = 5 # 早停的耐心步数
|
||||
# --- 交叉验证用的参数
|
||||
test_size = 200 # 测试集大小,定义100,后面使用的时候重新赋值
|
||||
val_size = test_size # 验证集大小,同测试集大小
|
||||
|
||||
### 特征筛选用到的参数
|
||||
k = 100 # 特征筛选数量,如果是0或者值比特征数量大,代表全部特征
|
||||
corr_threshold = 0.6 # 相关性大于0.6的特征
|
||||
rote = 0.06 # 绘图上下界阈值
|
||||
|
||||
### 计算准确率
|
||||
weight_dict = [0.4,0.15,0.1,0.1,0.25] # 权重
|
||||
|
||||
|
||||
### 文件
|
||||
data_set = '石油焦指标数据.xlsx' # 数据集文件
|
||||
dataset = 'shiyoujiaodataset' # 数据集文件夹
|
||||
|
||||
# 数据库名称
|
||||
db_name = os.path.join(dataset,'jbsh_juxiting.db')
|
||||
sqlitedb = SQLiteHandler(db_name)
|
||||
sqlitedb.connect()
|
||||
|
||||
settings = f'{input_size}-{horizon}-{train_steps}--{k}-{data_set}-{y}'
|
||||
# 获取日期时间
|
||||
# now = datetime.datetime.now().strftime('%Y%m%d%H%M%S') # 获取当前日期时间
|
||||
now = datetime.datetime.now().strftime('%Y-%m-%d') # 获取当前日期时间
|
||||
reportname = f'PP大模型预测报告--{end_time}.pdf' # 报告文件名
|
||||
reportname = reportname.replace(':', '-') # 替换冒号
|
||||
if end_time == '':
|
||||
end_time = now
|
||||
### 邮件配置
|
||||
username='1321340118@qq.com'
|
||||
passwd='wgczgyhtyyyyjghi'
|
||||
# recv=['liurui_test@163.com','52585119@qq.com']
|
||||
recv=['liurui_test@163.com']
|
||||
# recv=['liurui_test@163.com']
|
||||
title='reportname'
|
||||
content=y+'预测报告请看附件'
|
||||
file=os.path.join(dataset,'reportname')
|
||||
# file=os.path.join(dataset,'14-7-50--100-原油指标数据.xlsx-Brent连1合约价格--20240731175936-预测报告.pdf')
|
||||
ssl=True
|
||||
|
||||
|
||||
### 日志配置
|
||||
|
||||
# 创建日志目录(如果不存在)
|
||||
log_dir = 'logs'
|
||||
if not os.path.exists(log_dir):
|
||||
os.makedirs(log_dir)
|
||||
|
||||
# 配置日志记录器
|
||||
logger = logging.getLogger('my_logger')
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
# 配置文件处理器,将日志记录到文件
|
||||
file_handler = logging.handlers.RotatingFileHandler(os.path.join(log_dir, 'pricepredict.log'), maxBytes=1024 * 1024, backupCount=5)
|
||||
file_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
|
||||
|
||||
# 配置控制台处理器,将日志打印到控制台
|
||||
console_handler = logging.StreamHandler()
|
||||
console_handler.setFormatter(logging.Formatter('%(message)s'))
|
||||
|
||||
# 将处理器添加到日志记录器
|
||||
logger.addHandler(file_handler)
|
||||
logger.addHandler(console_handler)
|
||||
|
||||
# logger.info('当前配置:'+settings)
|
||||
|
@ -143,7 +143,7 @@ modelsindex = {
|
||||
}
|
||||
|
||||
# 百川数据指标编码
|
||||
baicangidnamedict = {
|
||||
baichuanidnamedict = {
|
||||
'1588348470396480000': '石油焦滨州-友泰',
|
||||
'1588348470396480000.00': '石油焦东营-海科瑞林',
|
||||
'1588348470396480000.00': '石油焦东营-华联2',
|
||||
@ -160,6 +160,8 @@ baicangidnamedict = {
|
||||
}
|
||||
|
||||
|
||||
# baichuanidnamedict = {'1588348470396475286': 'test1', '1666': 'test2'} # 北京环境测试用
|
||||
|
||||
# eta 上传预测结果的请求体,后面发起请求的时候更改 model datalist 数据
|
||||
data = {
|
||||
"IndexCode": "",
|
||||
@ -272,14 +274,14 @@ push_data_value_list_data = {
|
||||
}
|
||||
# 八大维度数据项编码
|
||||
bdwd_items = {
|
||||
# 'ciri': 'yyycbdwdcr',
|
||||
# 'benzhou': 'yyycbdwdbz',
|
||||
# 'cizhou': 'yyycbdwdcz',
|
||||
# 'gezhou': 'yyycbdwdgz',
|
||||
# 'ciyue': 'yyycbdwdcy',
|
||||
# 'cieryue': 'yyycbdwdcey',
|
||||
# 'cisanyue': 'yyycbdwdcsy',
|
||||
# 'cisiyue': 'yyycbdwdcsiy',
|
||||
'ciri': 'syjlyycbdwdcr',
|
||||
'benzhou': 'syjlyycbdwdbz',
|
||||
'cizhou': 'syjlyycbdwdcz',
|
||||
'gezhou': 'syjlyycbdwdgz',
|
||||
'ciyue': 'syjlyycbdwdcy',
|
||||
'cieryue': 'syjlyycbdwdcey',
|
||||
'cisanyue': 'syjlyycbdwdcsy',
|
||||
'cisiyue': 'syjlyycbdwdcsiy',
|
||||
}
|
||||
|
||||
# 北京环境数据库
|
||||
@ -326,7 +328,7 @@ if add_kdj and is_edbnamelist:
|
||||
edbnamelist = edbnamelist+['K', 'D', 'J']
|
||||
|
||||
# 模型参数
|
||||
y = 'B46cc7d0a90155b5bfd'
|
||||
y = '煅烧焦山东高硫(高端S < 3.5,普货)(元/吨)'
|
||||
avg_cols = [
|
||||
|
||||
]
|
||||
|
@ -57,6 +57,7 @@ global_config = {
|
||||
'y': None, # 目标变量列名
|
||||
'is_fivemodels': None,
|
||||
'weight_dict': None,
|
||||
'baicangidnamedict': None, # 百川id名称映射
|
||||
|
||||
# 模型参数
|
||||
'data_set': None, # 数据集名称
|
||||
@ -120,6 +121,8 @@ global_config = {
|
||||
|
||||
# 数据库配置
|
||||
'sqlitedb': None,
|
||||
'db_mysql': None,
|
||||
'baichuan_table_name': None,
|
||||
}
|
||||
# 定义函数
|
||||
|
||||
@ -1199,6 +1202,8 @@ class Config:
|
||||
# 数据库配置
|
||||
@property
|
||||
def sqlitedb(self): return global_config['sqlitedb']
|
||||
@property
|
||||
def db_mysql(self): return global_config['db_mysql']
|
||||
|
||||
|
||||
config = Config()
|
||||
@ -2213,3 +2218,38 @@ def addtimecharacteristics(df, dataset):
|
||||
df.drop(columns=['quarter_start', 'quarter'], inplace=True)
|
||||
df.to_csv(os.path.join(dataset, '指标数据添加时间特征.csv'), index=False)
|
||||
return df
|
||||
|
||||
|
||||
# 从数据库获取百川数据,接收一个百川id列表,返回df格式的数据
|
||||
def get_baichuan_data(baichuanidnamedict):
|
||||
baichuanidlist = list(baichuanidnamedict.keys())
|
||||
# 连接数据库
|
||||
db = config.db_mysql
|
||||
db.connect()
|
||||
# 执行SQL查询 select BAICHUAN_ID,DATA_DATE,DATA_VALUE from V_TBL_BAICHUAN_YINGFU_VALUE where BAICHUAN_ID in ('1588348470396475286','1666');
|
||||
sql = f"SELECT BAICHUAN_ID,DATA_DATE,DATA_VALUE FROM {global_config['baichuan_table_name']} WHERE BAICHUAN_ID in ({','.join(baichuanidlist)})"
|
||||
|
||||
# 获取查询结果
|
||||
results = db.execute_query(sql)
|
||||
df = pd.DataFrame(results, columns=[
|
||||
'BAICHUAN_ID', 'DATA_DATE', 'DATA_VALUE'])
|
||||
|
||||
# 按BAICHUAN_ID 进行分组,然后按DATA_DATE合并
|
||||
df1 = pd.DataFrame(columns=['DATA_DATE'])
|
||||
for baichuan_id, group in df.groupby('BAICHUAN_ID'):
|
||||
# group 删除BAICHUAN_ID列
|
||||
group.drop(columns=['BAICHUAN_ID'], inplace=True)
|
||||
# group DATA_value 转换为float类型,保留两位小数
|
||||
group['DATA_VALUE'] = group['DATA_VALUE'].astype(float).round(2)
|
||||
# group 更改列名
|
||||
group.rename(
|
||||
columns={'DATA_VALUE': baichuanidnamedict[baichuan_id]}, inplace=True)
|
||||
# 按DATA_DATE合并
|
||||
df1 = pd.merge(
|
||||
df1, group[['DATA_DATE', baichuanidnamedict[baichuan_id]]], on='DATA_DATE', how='outer')
|
||||
# 把DATA_DATE 列转换成日期格式
|
||||
df1['date'] = pd.to_datetime(
|
||||
df1['DATA_DATE']).dt.strftime('%Y-%m-%d')
|
||||
df1.drop(columns=['DATA_DATE'], inplace=True)
|
||||
|
||||
return df1
|
||||
|
301
main_shiyoujiao.py
Normal file
301
main_shiyoujiao.py
Normal file
@ -0,0 +1,301 @@
|
||||
# 读取配置
|
||||
from lib.dataread import *
|
||||
from lib.tools import SendMail,exception_logger
|
||||
from models.nerulforcastmodels import ex_Model_Juxiting,model_losss,model_losss_juxiting,brent_export_pdf,tansuanli_export_pdf,pp_export_pdf,model_losss_juxiting
|
||||
|
||||
import glob
|
||||
import torch
|
||||
torch.set_float32_matmul_precision("high")
|
||||
|
||||
|
||||
|
||||
def predict_main():
|
||||
"""
|
||||
主预测函数,用于从 ETA 获取数据、处理数据、训练模型并进行预测。
|
||||
|
||||
参数:
|
||||
signature (BinanceAPI): Binance API 实例。
|
||||
etadata (EtaReader): ETA 数据读取器实例。
|
||||
is_eta (bool): 是否从 ETA 获取数据。
|
||||
data_set (str): 数据集名称。
|
||||
dataset (str): 数据集路径。
|
||||
add_kdj (bool): 是否添加 KDJ 指标。
|
||||
is_timefurture (bool): 是否添加时间衍生特征。
|
||||
end_time (str): 结束时间。
|
||||
is_edbnamelist (bool): 是否使用 EDB 名称列表。
|
||||
edbnamelist (list): EDB 名称列表。
|
||||
y (str): 预测目标列名。
|
||||
sqlitedb (SQLiteDB): SQLite 数据库实例。
|
||||
is_corr (bool): 是否进行相关性分析。
|
||||
horizon (int): 预测时域。
|
||||
input_size (int): 输入数据大小。
|
||||
train_steps (int): 训练步数。
|
||||
val_check_steps (int): 验证检查步数。
|
||||
early_stop_patience_steps (int): 早停耐心步数。
|
||||
is_debug (bool): 是否调试模式。
|
||||
dataset (str): 数据集名称。
|
||||
is_train (bool): 是否训练模型。
|
||||
is_fivemodels (bool): 是否使用五个模型。
|
||||
val_size (float): 验证集大小。
|
||||
test_size (float): 测试集大小。
|
||||
settings (dict): 模型设置。
|
||||
now (str): 当前时间。
|
||||
etadata (EtaReader): ETA 数据读取器实例。
|
||||
modelsindex (list): 模型索引列表。
|
||||
data (str): 数据类型。
|
||||
is_eta (bool): 是否从 ETA 获取数据。
|
||||
|
||||
返回:
|
||||
None
|
||||
"""
|
||||
global end_time
|
||||
signature = BinanceAPI(APPID, SECRET)
|
||||
etadata = EtaReader(signature=signature,
|
||||
classifylisturl=classifylisturl,
|
||||
classifyidlisturl=classifyidlisturl,
|
||||
edbcodedataurl=edbcodedataurl,
|
||||
edbcodelist=edbcodelist,
|
||||
edbdatapushurl=edbdatapushurl,
|
||||
edbdeleteurl=edbdeleteurl,
|
||||
edbbusinessurl=edbbusinessurl
|
||||
)
|
||||
# 获取数据
|
||||
if is_eta:
|
||||
logger.info('从eta获取数据...')
|
||||
signature = BinanceAPI(APPID, SECRET)
|
||||
etadata = EtaReader(signature=signature,
|
||||
classifylisturl=classifylisturl,
|
||||
classifyidlisturl=classifyidlisturl,
|
||||
edbcodedataurl=edbcodedataurl,
|
||||
edbcodelist=edbcodelist,
|
||||
edbdatapushurl=edbdatapushurl,
|
||||
edbdeleteurl=edbdeleteurl,
|
||||
edbbusinessurl=edbbusinessurl,
|
||||
)
|
||||
df_zhibiaoshuju, df_zhibiaoliebiao = etadata.get_eta_api_shiyoujiao_data(data_set=data_set, dataset=dataset) # 原始数据,未处理
|
||||
|
||||
if is_market:
|
||||
logger.info('从市场信息平台获取数据...')
|
||||
try:
|
||||
# 如果是测试环境,最高价最低价取excel文档
|
||||
if server_host == '192.168.100.53':
|
||||
logger.info('从excel文档获取最高价最低价')
|
||||
df_zhibiaoshuju = get_high_low_data(df_zhibiaoshuju)
|
||||
else:
|
||||
logger.info('从市场信息平台获取数据')
|
||||
df_zhibiaoshuju = get_market_data(end_time,df_zhibiaoshuju)
|
||||
|
||||
except :
|
||||
logger.info('最高最低价拼接失败')
|
||||
|
||||
# 保存到xlsx文件的sheet表
|
||||
with pd.ExcelWriter(os.path.join(dataset,data_set)) as file:
|
||||
df_zhibiaoshuju.to_excel(file, sheet_name='指标数据', index=False)
|
||||
df_zhibiaoliebiao.to_excel(file, sheet_name='指标列表', index=False)
|
||||
|
||||
|
||||
# 数据处理
|
||||
df = datachuli_juxiting(df_zhibiaoshuju, df_zhibiaoliebiao, y=y, dataset=dataset, add_kdj=add_kdj, is_timefurture=is_timefurture,
|
||||
end_time=end_time)
|
||||
|
||||
else:
|
||||
# 读取数据
|
||||
logger.info('读取本地数据:' + os.path.join(dataset, data_set))
|
||||
df,df_zhibiaoliebiao = getdata_juxiting(filename=os.path.join(dataset, data_set), y=y, dataset=dataset, add_kdj=add_kdj,
|
||||
is_timefurture=is_timefurture, end_time=end_time) # 原始数据,未处理
|
||||
|
||||
# 更改预测列名称
|
||||
df.rename(columns={y: 'y'}, inplace=True)
|
||||
|
||||
if is_edbnamelist:
|
||||
df = df[edbnamelist]
|
||||
df.to_csv(os.path.join(dataset, '指标数据.csv'), index=False)
|
||||
# 保存最新日期的y值到数据库
|
||||
# 取第一行数据存储到数据库中
|
||||
first_row = df[['ds', 'y']].tail(1)
|
||||
# 判断y的类型是否为float
|
||||
if not isinstance(first_row['y'].values[0], float):
|
||||
logger.info(f'{end_time}预测目标数据为空,跳过')
|
||||
return None
|
||||
|
||||
# 将最新真实值保存到数据库
|
||||
if not sqlitedb.check_table_exists('trueandpredict'):
|
||||
first_row.to_sql('trueandpredict', sqlitedb.connection, index=False)
|
||||
else:
|
||||
for row in first_row.itertuples(index=False):
|
||||
row_dict = row._asdict()
|
||||
row_dict['ds'] = row_dict['ds'].strftime('%Y-%m-%d %H:%M:%S')
|
||||
check_query = sqlitedb.select_data('trueandpredict', where_condition=f"ds = '{row.ds}'")
|
||||
if len(check_query) > 0:
|
||||
set_clause = ", ".join([f"{key} = '{value}'" for key, value in row_dict.items()])
|
||||
sqlitedb.update_data('trueandpredict', set_clause, where_condition=f"ds = '{row.ds}'")
|
||||
continue
|
||||
sqlitedb.insert_data('trueandpredict', tuple(row_dict.values()), columns=row_dict.keys())
|
||||
|
||||
# 更新accuracy表的y值
|
||||
if not sqlitedb.check_table_exists('accuracy'):
|
||||
pass
|
||||
else:
|
||||
update_y = sqlitedb.select_data('accuracy',where_condition="y is null")
|
||||
if len(update_y) > 0:
|
||||
logger.info('更新accuracy表的y值')
|
||||
# 找到update_y 中ds且df中的y的行
|
||||
update_y = update_y[update_y['ds']<=end_time]
|
||||
logger.info(f'要更新y的信息:{update_y}')
|
||||
# try:
|
||||
for row in update_y.itertuples(index=False):
|
||||
try:
|
||||
row_dict = row._asdict()
|
||||
yy = df[df['ds']==row_dict['ds']]['y'].values[0]
|
||||
LOW = df[df['ds']==row_dict['ds']]['Brentzdj'].values[0]
|
||||
HIGH = df[df['ds']==row_dict['ds']]['Brentzgj'].values[0]
|
||||
sqlitedb.update_data('accuracy', f"y = {yy},LOW_PRICE = {LOW},HIGH_PRICE = {HIGH}", where_condition=f"ds = '{row_dict['ds']}'")
|
||||
except:
|
||||
logger.info(f'更新accuracy表的y值失败:{row_dict}')
|
||||
# except Exception as e:
|
||||
# logger.info(f'更新accuracy表的y值失败:{e}')
|
||||
|
||||
import datetime
|
||||
# 判断当前日期是不是周一
|
||||
is_weekday = datetime.datetime.now().weekday() == 0
|
||||
if is_weekday:
|
||||
logger.info('今天是周一,更新预测模型')
|
||||
# 计算最近60天预测残差最低的模型名称
|
||||
model_results = sqlitedb.select_data('trueandpredict', order_by="ds DESC", limit="60")
|
||||
# 删除空值率为90%以上的列
|
||||
if len(model_results) > 10:
|
||||
model_results = model_results.dropna(thresh=len(model_results)*0.1,axis=1)
|
||||
# 删除空行
|
||||
model_results = model_results.dropna()
|
||||
modelnames = model_results.columns.to_list()[2:-1]
|
||||
for col in model_results[modelnames].select_dtypes(include=['object']).columns:
|
||||
model_results[col] = model_results[col].astype(np.float32)
|
||||
# 计算每个预测值与真实值之间的偏差率
|
||||
for model in modelnames:
|
||||
model_results[f'{model}_abs_error_rate'] = abs(model_results['y'] - model_results[model]) / model_results['y']
|
||||
# 获取每行对应的最小偏差率值
|
||||
min_abs_error_rate_values = model_results.apply(lambda row: row[[f'{model}_abs_error_rate' for model in modelnames]].min(), axis=1)
|
||||
# 获取每行对应的最小偏差率值对应的列名
|
||||
min_abs_error_rate_column_name = model_results.apply(lambda row: row[[f'{model}_abs_error_rate' for model in modelnames]].idxmin(), axis=1)
|
||||
# 将列名索引转换为列名
|
||||
min_abs_error_rate_column_name = min_abs_error_rate_column_name.map(lambda x: x.split('_')[0])
|
||||
# 取出现次数最多的模型名称
|
||||
most_common_model = min_abs_error_rate_column_name.value_counts().idxmax()
|
||||
logger.info(f"最近60天预测残差最低的模型名称:{most_common_model}")
|
||||
# 保存结果到数据库
|
||||
if not sqlitedb.check_table_exists('most_model'):
|
||||
sqlitedb.create_table('most_model', columns="ds datetime, most_common_model TEXT")
|
||||
sqlitedb.insert_data('most_model', (datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), most_common_model,), columns=('ds', 'most_common_model',))
|
||||
|
||||
try:
|
||||
if is_weekday:
|
||||
# if True:
|
||||
logger.info('今天是周一,发送特征预警')
|
||||
# 上传预警信息到数据库
|
||||
warning_data_df = df_zhibiaoliebiao.copy()
|
||||
warning_data_df = warning_data_df[warning_data_df['停更周期']> 3 ][['指标名称', '指标id', '频度','更新周期','指标来源','最后更新时间','停更周期']]
|
||||
# 重命名列名
|
||||
warning_data_df = warning_data_df.rename(columns={'指标名称': 'INDICATOR_NAME', '指标id': 'INDICATOR_ID', '频度': 'FREQUENCY', '更新周期': 'UPDATE_FREQUENCY', '指标来源': 'DATA_SOURCE', '最后更新时间': 'LAST_UPDATE_DATE', '停更周期': 'UPDATE_SUSPENSION_CYCLE'})
|
||||
from sqlalchemy import create_engine
|
||||
import urllib
|
||||
global password
|
||||
if '@' in password:
|
||||
password = urllib.parse.quote_plus(password)
|
||||
|
||||
engine = create_engine(f'mysql+pymysql://{dbusername}:{password}@{host}:{port}/{dbname}')
|
||||
warning_data_df['WARNING_DATE'] = datetime.date.today().strftime("%Y-%m-%d %H:%M:%S")
|
||||
warning_data_df['TENANT_CODE'] = 'T0004'
|
||||
# 插入数据之前查询表数据然后新增id列
|
||||
existing_data = pd.read_sql(f"SELECT * FROM {table_name}", engine)
|
||||
if not existing_data.empty:
|
||||
max_id = existing_data['ID'].astype(int).max()
|
||||
warning_data_df['ID'] = range(max_id + 1, max_id + 1 + len(warning_data_df))
|
||||
else:
|
||||
warning_data_df['ID'] = range(1, 1 + len(warning_data_df))
|
||||
warning_data_df.to_sql(table_name, con=engine, if_exists='append', index=False)
|
||||
if is_update_warning_data:
|
||||
upload_warning_info(len(warning_data_df))
|
||||
except:
|
||||
logger.info('上传预警信息到数据库失败')
|
||||
|
||||
if is_corr:
|
||||
df = corr_feature(df=df)
|
||||
|
||||
df1 = df.copy() # 备份一下,后面特征筛选完之后加入ds y 列用
|
||||
logger.info(f"开始训练模型...")
|
||||
row, col = df.shape
|
||||
|
||||
now = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
|
||||
ex_Model_Juxiting(df,
|
||||
horizon=horizon,
|
||||
input_size=input_size,
|
||||
train_steps=train_steps,
|
||||
val_check_steps=val_check_steps,
|
||||
early_stop_patience_steps=early_stop_patience_steps,
|
||||
is_debug=is_debug,
|
||||
dataset=dataset,
|
||||
is_train=is_train,
|
||||
is_fivemodels=is_fivemodels,
|
||||
val_size=val_size,
|
||||
test_size=test_size,
|
||||
settings=settings,
|
||||
now=now,
|
||||
etadata=etadata,
|
||||
modelsindex=modelsindex,
|
||||
data=data,
|
||||
is_eta=is_eta,
|
||||
end_time=end_time,
|
||||
)
|
||||
|
||||
|
||||
logger.info('模型训练完成')
|
||||
|
||||
logger.info('训练数据绘图ing')
|
||||
model_results3 = model_losss_juxiting(sqlitedb)
|
||||
logger.info('训练数据绘图end')
|
||||
|
||||
# 模型报告
|
||||
logger.info('制作报告ing')
|
||||
title = f'{settings}--{end_time}-预测报告' # 报告标题
|
||||
reportname = f'PP大模型预测报告--{end_time}.pdf' # 报告文件名
|
||||
reportname = reportname.replace(':', '-') # 替换冒号
|
||||
pp_export_pdf(dataset=dataset,num_models = 5 if is_fivemodels else 22,time=end_time,
|
||||
reportname=reportname,sqlitedb=sqlitedb),
|
||||
|
||||
logger.info('制作报告end')
|
||||
logger.info('模型训练完成')
|
||||
|
||||
# # LSTM 单变量模型
|
||||
# ex_Lstm(df,input_seq_len=input_size,output_seq_len=horizon,is_debug=is_debug,dataset=dataset)
|
||||
|
||||
# # lstm 多变量模型
|
||||
# ex_Lstm_M(df,n_days=input_size,out_days=horizon,is_debug=is_debug,datasetpath=dataset)
|
||||
|
||||
# # GRU 模型
|
||||
# # ex_GRU(df)
|
||||
|
||||
# 发送邮件
|
||||
m = SendMail(
|
||||
username=username,
|
||||
passwd=passwd,
|
||||
recv=recv,
|
||||
title=title,
|
||||
content=content,
|
||||
file=max(glob.glob(os.path.join(dataset,'*.pdf')), key=os.path.getctime),
|
||||
ssl=ssl,
|
||||
)
|
||||
# m.send_mail()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# global end_time
|
||||
# is_on = True
|
||||
# # 遍历2024-11-25 到 2024-12-3 之间的工作日日期
|
||||
# for i_time in pd.date_range('2025-1-20', '2025-2-6', freq='B'):
|
||||
# end_time = i_time.strftime('%Y-%m-%d')
|
||||
# try:
|
||||
# predict_main()
|
||||
# except:
|
||||
# pass
|
||||
|
||||
predict_main()
|
@ -18,6 +18,7 @@ global_config.update({
|
||||
'is_fivemodels': is_fivemodels,
|
||||
'settings': settings,
|
||||
'weight_dict': weight_dict,
|
||||
'baichuanidnamedict': baichuanidnamedict,
|
||||
|
||||
|
||||
# 模型参数
|
||||
@ -72,11 +73,14 @@ global_config.update({
|
||||
'edbdatapushurl': edbdatapushurl,
|
||||
'edbdeleteurl': edbdeleteurl,
|
||||
'edbbusinessurl': edbbusinessurl,
|
||||
'edbcodenamedict': edbcodenamedict,
|
||||
'ClassifyId': ClassifyId,
|
||||
'classifylisturl': classifylisturl,
|
||||
|
||||
# 数据库配置
|
||||
'sqlitedb': sqlitedb,
|
||||
'db_mysql': db_mysql,
|
||||
'baichuan_table_name': baichuan_table_name,
|
||||
})
|
||||
|
||||
|
||||
@ -173,6 +177,7 @@ def predict_main():
|
||||
返回:
|
||||
None
|
||||
"""
|
||||
|
||||
end_time = global_config['end_time']
|
||||
# 获取数据
|
||||
if is_eta:
|
||||
@ -206,6 +211,19 @@ def predict_main():
|
||||
except:
|
||||
logger.info('最高最低价拼接失败')
|
||||
|
||||
if len(global_config['baichuanidnamedict']) > 0:
|
||||
logger.info('从市场数据库获取百川数据...')
|
||||
baichuandf = get_baichuan_data(global_config['baichuanidnamedict'])
|
||||
df_zhibiaoshuju = pd.merge(
|
||||
df_zhibiaoshuju, baichuandf, on='date', how='outer')
|
||||
# 指标列表添加百川数据
|
||||
df_baichuanliebiao = pd.DataFrame(
|
||||
global_config['baichuanidnamedict'].items(), columns=['指标id', '指标名称'])
|
||||
df_baichuanliebiao['指标分类'] = '百川'
|
||||
df_baichuanliebiao['频度'] = '其他'
|
||||
df_zhibiaoliebiao = pd.concat(
|
||||
[df_zhibiaoliebiao, df_baichuanliebiao], axis=0)
|
||||
|
||||
# 保存到xlsx文件的sheet表
|
||||
with pd.ExcelWriter(os.path.join(dataset, data_set)) as file:
|
||||
df_zhibiaoshuju.to_excel(file, sheet_name='指标数据', index=False)
|
||||
|
669
test/ETA获取数据.py
669
test/ETA获取数据.py
File diff suppressed because it is too large
Load Diff
BIN
成品油eta数据.xlsx
Normal file
BIN
成品油eta数据.xlsx
Normal file
Binary file not shown.
BIN
沥青eta数据.xlsx
Normal file
BIN
沥青eta数据.xlsx
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user