Compare commits
3 Commits
5191ff7c69
...
f5286990fa
Author | SHA1 | Date | |
---|---|---|---|
|
f5286990fa | ||
|
308e831468 | ||
|
7cf3cda87a |
@ -18,22 +18,10 @@
|
|||||||
"import numpy as np\n",
|
"import numpy as np\n",
|
||||||
"# 变量定义\n",
|
"# 变量定义\n",
|
||||||
"login_url = \"http://10.200.32.39/jingbo-api/api/server/login\"\n",
|
"login_url = \"http://10.200.32.39/jingbo-api/api/server/login\"\n",
|
||||||
"# query_data_list_item_nos_url\n",
|
"search_url = \"http://10.200.32.39/jingbo-api/api/warehouse/dwDataItem/queryByItemNos\"\n",
|
||||||
"search_url = \"http://10.200.32.39/jingbo-api/api/warehouse/dwDataItem/queryByItemNos\" #jingbo-dev/api/warehouse/dwDataItem/queryDataListItemNos\n",
|
|
||||||
"upload_url = \"http://10.200.32.39/jingbo-api/api/dw/dataValue/pushDataValueList\"\n",
|
"upload_url = \"http://10.200.32.39/jingbo-api/api/dw/dataValue/pushDataValueList\"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"query_data_list_item_nos_data = {\n",
|
|
||||||
" \"funcModule\": \"数据项\",\n",
|
|
||||||
" \"funcOperation\": \"查询\",\n",
|
|
||||||
" \"data\": {\n",
|
|
||||||
" \"dateStart\": \"20200101\",\n",
|
|
||||||
" \"dateEnd\": \"20241231\",\n",
|
|
||||||
" \"dataItemNoList\": [\"Brentzdj\", \"Brentzgj\"] # 数据项编码,代表 brent最低价和最高价\n",
|
|
||||||
" }\n",
|
|
||||||
"}\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"login_data = {\n",
|
"login_data = {\n",
|
||||||
" \"data\": {\n",
|
" \"data\": {\n",
|
||||||
" \"account\": \"api_dev\",\n",
|
" \"account\": \"api_dev\",\n",
|
||||||
@ -856,7 +844,7 @@
|
|||||||
" # headers1 = {\"Authorization\": token_push}\n",
|
" # headers1 = {\"Authorization\": token_push}\n",
|
||||||
" # res = requests.post(url=upload_url, headers=headers1, json=data1, timeout=(3, 5))\n",
|
" # res = requests.post(url=upload_url, headers=headers1, json=data1, timeout=(3, 5))\n",
|
||||||
" \n",
|
" \n",
|
||||||
"\n",
|
" \n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
@ -972,6 +960,8 @@
|
|||||||
" # 保存新的xls文件\n",
|
" # 保存新的xls文件\n",
|
||||||
" new_workbook.save(\"定性模型数据项12-11.xlsx\")\n",
|
" new_workbook.save(\"定性模型数据项12-11.xlsx\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
"\n"
|
"\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": 3,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
|
Binary file not shown.
@ -2,7 +2,7 @@
|
|||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": 27,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
|
@ -1,320 +0,0 @@
|
|||||||
import logging
|
|
||||||
import os
|
|
||||||
import logging.handlers
|
|
||||||
import datetime
|
|
||||||
from lib.tools import MySQLDB,SQLiteHandler
|
|
||||||
|
|
||||||
|
|
||||||
# eta 接口token
|
|
||||||
APPID = "XNLDvxZHHugj7wJ7"
|
|
||||||
SECRET = "iSeU4s6cKKBVbt94htVY1p0sqUMqb2xa"
|
|
||||||
|
|
||||||
# eta 接口url
|
|
||||||
sourcelisturl = 'http://10.189.2.78:8108/v1/edb/source/list'
|
|
||||||
classifylisturl = 'http://10.189.2.78:8108/v1/edb/classify/list?ClassifyType='
|
|
||||||
uniquecodedataurl = 'http://10.189.2.78:8108/v1/edb/data?UniqueCode=4991c37becba464609b409909fe4d992&StartDate=2024-02-01'
|
|
||||||
classifyidlisturl = 'http://10.189.2.78:8108/v1/edb/list?ClassifyId='
|
|
||||||
edbcodedataurl = 'http://10.189.2.78:8108/v1/edb/data?EdbCode='
|
|
||||||
edbdatapushurl = 'http://10.189.2.78:8108/v1/edb/push'
|
|
||||||
edbdeleteurl = 'http://10.189.2.78:8108/v1/edb/business/edb/del'
|
|
||||||
edbbusinessurl = 'http://10.189.2.78:8108/v1/edb/business/data/del'
|
|
||||||
edbcodelist = ['ID01385938','lmcads03 lme comdty',
|
|
||||||
'GC1 COMB Comdty',
|
|
||||||
'C2404171822',
|
|
||||||
'dxy curncy',
|
|
||||||
'S5443199 ',
|
|
||||||
'S5479800',
|
|
||||||
'S5443108',
|
|
||||||
'H7358586',
|
|
||||||
'LC3FM1 INDEX',
|
|
||||||
'CNY REGN Curncy',
|
|
||||||
's0105897',
|
|
||||||
'M0067419',
|
|
||||||
'M0066351',
|
|
||||||
'S0266372',
|
|
||||||
'S0266438',
|
|
||||||
'S0266506',
|
|
||||||
'ID01384463']
|
|
||||||
|
|
||||||
# 临时写死用指定的列,与上面的edbcode对应,后面更改
|
|
||||||
edbnamelist = [
|
|
||||||
'ds','y',
|
|
||||||
'LME铜价',
|
|
||||||
'黄金连1合约',
|
|
||||||
'Brent-WTI',
|
|
||||||
'美元指数',
|
|
||||||
'甲醇鲁南价格',
|
|
||||||
'甲醇太仓港口价格',
|
|
||||||
'山东丙烯主流价',
|
|
||||||
'丙烷(山东)',
|
|
||||||
'FEI丙烷 M1',
|
|
||||||
'在岸人民币汇率',
|
|
||||||
'南华工业品指数',
|
|
||||||
'PVC期货主力',
|
|
||||||
'PE期货收盘价',
|
|
||||||
'PP连续-1月',
|
|
||||||
'PP连续-5月',
|
|
||||||
'PP连续-9月',
|
|
||||||
'PP:拉丝:L5E89:出厂价:华北(第二区域):内蒙古久泰新材料(日)'
|
|
||||||
]
|
|
||||||
|
|
||||||
edbcodenamedict = {
|
|
||||||
'ID01385938':'PP:拉丝:1102K:市场价:青州:国家能源宁煤(日)',
|
|
||||||
'ID01384463':'PP:拉丝:L5E89:出厂价:华北(第二区域):内蒙古久泰新材料(日)',
|
|
||||||
'lmcads03 lme comdty':'LME铜价',
|
|
||||||
'GC1 COMB Comdty':'黄金连1合约',
|
|
||||||
'C2404171822':'Brent-WTI',
|
|
||||||
'dxy curncy':'美元指数',
|
|
||||||
'S5443199 ':'甲醇鲁南价格',
|
|
||||||
'S5479800':'甲醇太仓港口价格',
|
|
||||||
'S5443108':'山东丙烯主流价',
|
|
||||||
'H7358586':'丙烷(山东)',
|
|
||||||
'LC3FM1 INDEX':'FEI丙烷 M1',
|
|
||||||
'CNY REGN Curncy':'在岸人民币汇率',
|
|
||||||
's0105897':'南华工业品指数',
|
|
||||||
'M0067419':'PVC期货主力',
|
|
||||||
'M0066351':'PE期货收盘价',
|
|
||||||
'S0266372':'PP连续-1月',
|
|
||||||
'S0266438':'PP连续-5月',
|
|
||||||
'S0266506':'PP连续-9月',
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
# eta自有数据指标编码
|
|
||||||
modelsindex = {
|
|
||||||
'NHITS': 'SELF0000077',
|
|
||||||
'Informer':'SELF0000078',
|
|
||||||
'LSTM':'SELF0000079',
|
|
||||||
'iTransformer':'SELF0000080',
|
|
||||||
'TSMixer':'SELF0000081',
|
|
||||||
'TSMixerx':'SELF0000082',
|
|
||||||
'PatchTST':'SELF0000083',
|
|
||||||
'RNN':'SELF0000084',
|
|
||||||
'GRU':'SELF0000085',
|
|
||||||
'TCN':'SELF0000086',
|
|
||||||
'BiTCN':'SELF0000087',
|
|
||||||
'DilatedRNN':'SELF0000088',
|
|
||||||
'MLP':'SELF0000089',
|
|
||||||
'DLinear':'SELF0000090',
|
|
||||||
'NLinear':'SELF0000091',
|
|
||||||
'TFT':'SELF0000092',
|
|
||||||
'FEDformer':'SELF0000093',
|
|
||||||
'StemGNN':'SELF0000094',
|
|
||||||
'MLPMultivariate':'SELF0000095',
|
|
||||||
'TiDE':'SELF0000096',
|
|
||||||
'DeepNPTS':'SELF0000097'
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# eta 上传预测结果的请求体,后面发起请求的时候更改 model datalist 数据
|
|
||||||
data = {
|
|
||||||
"IndexCode": "",
|
|
||||||
"IndexName": "价格预测模型",
|
|
||||||
"Unit": "无",
|
|
||||||
"Frequency": "日度",
|
|
||||||
"SourceName": f"价格预测",
|
|
||||||
"Remark": 'ddd',
|
|
||||||
"DataList": [
|
|
||||||
{
|
|
||||||
"Date": "2024-05-02",
|
|
||||||
"Value": 333444
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
# eta 分类
|
|
||||||
# level:3才可以获取到数据,所以需要人工把能源化工下所有的level3级都找到
|
|
||||||
# url = 'http://10.189.2.78:8108/v1/edb/list?ClassifyId=1214'
|
|
||||||
#ParentId ":1160, 能源化工
|
|
||||||
# ClassifyId ":1214,原油 3912 石油焦
|
|
||||||
#ParentId ":1214,",就是原油下所有的数据。
|
|
||||||
ClassifyId = 3707
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
############################################################################################################### 变量定义--测试环境
|
|
||||||
server_host = '192.168.100.53'
|
|
||||||
|
|
||||||
login_pushreport_url = f"http://{server_host}:8080/jingbo-dev/api/server/login"
|
|
||||||
upload_url = f"http://{server_host}:8080/jingbo-dev/api/analysis/reportInfo/researchUploadReportSave"
|
|
||||||
upload_warning_url = f"http://{server_host}:8080/jingbo-dev/api/basicBuiness/crudeOilWarning/save"
|
|
||||||
query_data_list_item_nos_url = f"http://{server_host}:8080/jingbo-dev/api/warehouse/dwDataItem/queryDataListItemNos"
|
|
||||||
|
|
||||||
login_data = {
|
|
||||||
"data": {
|
|
||||||
"account": "api_test",
|
|
||||||
# "password": "MmVmNzNlOWI0MmY0ZDdjZGUwNzE3ZjFiMDJiZDZjZWU=", # Shihua@123456
|
|
||||||
"password": "ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=", # 123456
|
|
||||||
"tenantHashCode": "8a4577dbd919675758d57999a1e891fe",
|
|
||||||
"terminal": "API"
|
|
||||||
},
|
|
||||||
"funcModule": "API",
|
|
||||||
"funcOperation": "获取token"
|
|
||||||
}
|
|
||||||
|
|
||||||
upload_data = {
|
|
||||||
"funcModule":'研究报告信息',
|
|
||||||
"funcOperation":'上传聚烯烃PP价格预测报告',
|
|
||||||
"data":{
|
|
||||||
"groupNo":'000128', # 用户组编号
|
|
||||||
"ownerAccount":'arui', #报告所属用户账号
|
|
||||||
"reportType":'OIL_PRICE_FORECAST', # 报告类型,固定为OIL_PRICE_FORECAST
|
|
||||||
"fileName": '2000-40-5-50--100-原油指标数据.xlsx-Brent活跃合约--2024-09-06-15-01-29-预测报告.pdf', #文件名称
|
|
||||||
"fileBase64": '' ,#文件内容base64
|
|
||||||
"categoryNo":'yyjgycbg', # 研究报告分类编码
|
|
||||||
"smartBusinessClassCode":'JXTJGYCBG', #分析报告分类编码
|
|
||||||
"reportEmployeeCode":"E40116", # 报告人
|
|
||||||
"reportDeptCode" :"D0044" ,# 报告部门
|
|
||||||
"productGroupCode":"RAW_MATERIAL" # 商品分类
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
warning_data = {
|
|
||||||
"groupNo":'000128', # 用户组编号
|
|
||||||
"funcModule":'原油特征停更预警',
|
|
||||||
"funcOperation":'原油特征停更预警',
|
|
||||||
"data":{
|
|
||||||
'WARNING_TYPE_NAME':'特征数据停更预警',
|
|
||||||
'WARNING_CONTENT':'',
|
|
||||||
'WARNING_DATE':''
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
query_data_list_item_nos_data = {
|
|
||||||
"funcModule": "数据项",
|
|
||||||
"funcOperation": "查询",
|
|
||||||
"data": {
|
|
||||||
"dateStart":"20200101",
|
|
||||||
"dateEnd":"20241231",
|
|
||||||
"dataItemNoList":["Brentzdj","Brentzgj"] # 数据项编码,代表 brent最低价和最高价
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
# 北京环境数据库
|
|
||||||
host = '192.168.101.27'
|
|
||||||
port = 3306
|
|
||||||
dbusername ='root'
|
|
||||||
password = '123456'
|
|
||||||
dbname = 'jingbo_test'
|
|
||||||
table_name = 'v_tbl_crude_oil_warning'
|
|
||||||
|
|
||||||
|
|
||||||
### 开关
|
|
||||||
is_train = False # 是否训练
|
|
||||||
is_debug = True # 是否调试
|
|
||||||
is_eta = True # 是否使用eta接口
|
|
||||||
is_market = False # 是否通过市场信息平台获取特征 ,在is_eta 为true 的情况下生效
|
|
||||||
is_timefurture = True # 是否使用时间特征
|
|
||||||
is_fivemodels = False # 是否使用之前保存的最佳的5个模型
|
|
||||||
is_edbcode = False # 特征使用edbcoding列表中的
|
|
||||||
is_edbnamelist = False # 自定义特征,对应上面的edbnamelist
|
|
||||||
is_update_eta = False # 预测结果上传到eta
|
|
||||||
is_update_report = True # 是否上传报告
|
|
||||||
is_update_warning_data = False # 是否上传预警数据
|
|
||||||
is_del_corr = 0.6 # 是否删除相关性高的特征,取值为 0-1 ,0 为不删除,0.6 表示删除相关性小于0.6的特征
|
|
||||||
is_del_tow_month = True # 是否删除两个月不更新的特征
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# 连接到数据库
|
|
||||||
db_mysql = MySQLDB(host=host, user=dbusername, password=password, database=dbname)
|
|
||||||
db_mysql.connect()
|
|
||||||
print("数据库连接成功",host,dbname,dbusername)
|
|
||||||
|
|
||||||
|
|
||||||
# 数据截取日期
|
|
||||||
start_year = 2020 # 数据开始年份
|
|
||||||
end_time = '' # 数据截取日期
|
|
||||||
freq = 'B' # 时间频率,"D": 天 "W": 周"M": 月"Q": 季度"A": 年 "H": 小时 "T": 分钟 "S": 秒 "B": 工作日
|
|
||||||
delweekenday = True if freq == 'B' else False # 是否删除周末数据
|
|
||||||
is_corr = False # 特征是否参与滞后领先提升相关系数
|
|
||||||
add_kdj = False # 是否添加kdj指标
|
|
||||||
if add_kdj and is_edbnamelist:
|
|
||||||
edbnamelist = edbnamelist+['K','D','J']
|
|
||||||
|
|
||||||
### 模型参数
|
|
||||||
y = 'AVG-金能大唐久泰青州'
|
|
||||||
avg_cols = [
|
|
||||||
'PP:拉丝:1102K:出厂价:青州:国家能源宁煤(日)',
|
|
||||||
'PP:拉丝:L5E89:出厂价:华北(第二区域):内蒙古久泰新材料(日)',
|
|
||||||
'PP:拉丝:L5E89:出厂价:河北、鲁北:大唐内蒙多伦(日)',
|
|
||||||
'PP:拉丝:HP550J:市场价:青岛:金能化学(日)'
|
|
||||||
]
|
|
||||||
offsite = 80
|
|
||||||
offsite_col = ['PP:拉丝:HP550J:市场价:青岛:金能化学(日)']
|
|
||||||
horizon =5 # 预测的步长
|
|
||||||
input_size = 40 # 输入序列长度
|
|
||||||
train_steps = 50 if is_debug else 1000 # 训练步数,用来限定epoch次数
|
|
||||||
val_check_steps = 30 # 评估频率
|
|
||||||
early_stop_patience_steps = 5 # 早停的耐心步数
|
|
||||||
# --- 交叉验证用的参数
|
|
||||||
test_size = 200 # 测试集大小,定义100,后面使用的时候重新赋值
|
|
||||||
val_size = test_size # 验证集大小,同测试集大小
|
|
||||||
|
|
||||||
### 特征筛选用到的参数
|
|
||||||
k = 100 # 特征筛选数量,如果是0或者值比特征数量大,代表全部特征
|
|
||||||
corr_threshold = 0.6 # 相关性大于0.6的特征
|
|
||||||
rote = 0.06 # 绘图上下界阈值
|
|
||||||
|
|
||||||
### 计算准确率
|
|
||||||
weight_dict = [0.4,0.15,0.1,0.1,0.25] # 权重
|
|
||||||
|
|
||||||
|
|
||||||
### 文件
|
|
||||||
data_set = '石油焦指标数据.xlsx' # 数据集文件
|
|
||||||
dataset = 'shiyoujiaodataset' # 数据集文件夹
|
|
||||||
|
|
||||||
# 数据库名称
|
|
||||||
db_name = os.path.join(dataset,'jbsh_juxiting.db')
|
|
||||||
sqlitedb = SQLiteHandler(db_name)
|
|
||||||
sqlitedb.connect()
|
|
||||||
|
|
||||||
settings = f'{input_size}-{horizon}-{train_steps}--{k}-{data_set}-{y}'
|
|
||||||
# 获取日期时间
|
|
||||||
# now = datetime.datetime.now().strftime('%Y%m%d%H%M%S') # 获取当前日期时间
|
|
||||||
now = datetime.datetime.now().strftime('%Y-%m-%d') # 获取当前日期时间
|
|
||||||
reportname = f'PP大模型预测报告--{end_time}.pdf' # 报告文件名
|
|
||||||
reportname = reportname.replace(':', '-') # 替换冒号
|
|
||||||
if end_time == '':
|
|
||||||
end_time = now
|
|
||||||
### 邮件配置
|
|
||||||
username='1321340118@qq.com'
|
|
||||||
passwd='wgczgyhtyyyyjghi'
|
|
||||||
# recv=['liurui_test@163.com','52585119@qq.com']
|
|
||||||
recv=['liurui_test@163.com']
|
|
||||||
# recv=['liurui_test@163.com']
|
|
||||||
title='reportname'
|
|
||||||
content=y+'预测报告请看附件'
|
|
||||||
file=os.path.join(dataset,'reportname')
|
|
||||||
# file=os.path.join(dataset,'14-7-50--100-原油指标数据.xlsx-Brent连1合约价格--20240731175936-预测报告.pdf')
|
|
||||||
ssl=True
|
|
||||||
|
|
||||||
|
|
||||||
### 日志配置
|
|
||||||
|
|
||||||
# 创建日志目录(如果不存在)
|
|
||||||
log_dir = 'logs'
|
|
||||||
if not os.path.exists(log_dir):
|
|
||||||
os.makedirs(log_dir)
|
|
||||||
|
|
||||||
# 配置日志记录器
|
|
||||||
logger = logging.getLogger('my_logger')
|
|
||||||
logger.setLevel(logging.INFO)
|
|
||||||
|
|
||||||
# 配置文件处理器,将日志记录到文件
|
|
||||||
file_handler = logging.handlers.RotatingFileHandler(os.path.join(log_dir, 'pricepredict.log'), maxBytes=1024 * 1024, backupCount=5)
|
|
||||||
file_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
|
|
||||||
|
|
||||||
# 配置控制台处理器,将日志打印到控制台
|
|
||||||
console_handler = logging.StreamHandler()
|
|
||||||
console_handler.setFormatter(logging.Formatter('%(message)s'))
|
|
||||||
|
|
||||||
# 将处理器添加到日志记录器
|
|
||||||
logger.addHandler(file_handler)
|
|
||||||
logger.addHandler(console_handler)
|
|
||||||
|
|
||||||
# logger.info('当前配置:'+settings)
|
|
||||||
|
|
@ -143,7 +143,7 @@ modelsindex = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
# 百川数据指标编码
|
# 百川数据指标编码
|
||||||
baichuanidnamedict = {
|
baicangidnamedict = {
|
||||||
'1588348470396480000': '石油焦滨州-友泰',
|
'1588348470396480000': '石油焦滨州-友泰',
|
||||||
'1588348470396480000.00': '石油焦东营-海科瑞林',
|
'1588348470396480000.00': '石油焦东营-海科瑞林',
|
||||||
'1588348470396480000.00': '石油焦东营-华联2',
|
'1588348470396480000.00': '石油焦东营-华联2',
|
||||||
@ -160,8 +160,6 @@ baichuanidnamedict = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
# baichuanidnamedict = {'1588348470396475286': 'test1', '1666': 'test2'} # 北京环境测试用
|
|
||||||
|
|
||||||
# eta 上传预测结果的请求体,后面发起请求的时候更改 model datalist 数据
|
# eta 上传预测结果的请求体,后面发起请求的时候更改 model datalist 数据
|
||||||
data = {
|
data = {
|
||||||
"IndexCode": "",
|
"IndexCode": "",
|
||||||
@ -274,14 +272,14 @@ push_data_value_list_data = {
|
|||||||
}
|
}
|
||||||
# 八大维度数据项编码
|
# 八大维度数据项编码
|
||||||
bdwd_items = {
|
bdwd_items = {
|
||||||
'ciri': 'syjlyycbdwdcr',
|
# 'ciri': 'yyycbdwdcr',
|
||||||
'benzhou': 'syjlyycbdwdbz',
|
# 'benzhou': 'yyycbdwdbz',
|
||||||
'cizhou': 'syjlyycbdwdcz',
|
# 'cizhou': 'yyycbdwdcz',
|
||||||
'gezhou': 'syjlyycbdwdgz',
|
# 'gezhou': 'yyycbdwdgz',
|
||||||
'ciyue': 'syjlyycbdwdcy',
|
# 'ciyue': 'yyycbdwdcy',
|
||||||
'cieryue': 'syjlyycbdwdcey',
|
# 'cieryue': 'yyycbdwdcey',
|
||||||
'cisanyue': 'syjlyycbdwdcsy',
|
# 'cisanyue': 'yyycbdwdcsy',
|
||||||
'cisiyue': 'syjlyycbdwdcsiy',
|
# 'cisiyue': 'yyycbdwdcsiy',
|
||||||
}
|
}
|
||||||
|
|
||||||
# 北京环境数据库
|
# 北京环境数据库
|
||||||
@ -328,7 +326,7 @@ if add_kdj and is_edbnamelist:
|
|||||||
edbnamelist = edbnamelist+['K', 'D', 'J']
|
edbnamelist = edbnamelist+['K', 'D', 'J']
|
||||||
|
|
||||||
# 模型参数
|
# 模型参数
|
||||||
y = '煅烧焦山东高硫(高端S < 3.5,普货)(元/吨)'
|
y = 'B46cc7d0a90155b5bfd'
|
||||||
avg_cols = [
|
avg_cols = [
|
||||||
|
|
||||||
]
|
]
|
||||||
|
@ -57,7 +57,6 @@ global_config = {
|
|||||||
'y': None, # 目标变量列名
|
'y': None, # 目标变量列名
|
||||||
'is_fivemodels': None,
|
'is_fivemodels': None,
|
||||||
'weight_dict': None,
|
'weight_dict': None,
|
||||||
'baicangidnamedict': None, # 百川id名称映射
|
|
||||||
|
|
||||||
# 模型参数
|
# 模型参数
|
||||||
'data_set': None, # 数据集名称
|
'data_set': None, # 数据集名称
|
||||||
@ -121,8 +120,6 @@ global_config = {
|
|||||||
|
|
||||||
# 数据库配置
|
# 数据库配置
|
||||||
'sqlitedb': None,
|
'sqlitedb': None,
|
||||||
'db_mysql': None,
|
|
||||||
'baichuan_table_name': None,
|
|
||||||
}
|
}
|
||||||
# 定义函数
|
# 定义函数
|
||||||
|
|
||||||
@ -1202,8 +1199,6 @@ class Config:
|
|||||||
# 数据库配置
|
# 数据库配置
|
||||||
@property
|
@property
|
||||||
def sqlitedb(self): return global_config['sqlitedb']
|
def sqlitedb(self): return global_config['sqlitedb']
|
||||||
@property
|
|
||||||
def db_mysql(self): return global_config['db_mysql']
|
|
||||||
|
|
||||||
|
|
||||||
config = Config()
|
config = Config()
|
||||||
@ -2218,38 +2213,3 @@ def addtimecharacteristics(df, dataset):
|
|||||||
df.drop(columns=['quarter_start', 'quarter'], inplace=True)
|
df.drop(columns=['quarter_start', 'quarter'], inplace=True)
|
||||||
df.to_csv(os.path.join(dataset, '指标数据添加时间特征.csv'), index=False)
|
df.to_csv(os.path.join(dataset, '指标数据添加时间特征.csv'), index=False)
|
||||||
return df
|
return df
|
||||||
|
|
||||||
|
|
||||||
# 从数据库获取百川数据,接收一个百川id列表,返回df格式的数据
|
|
||||||
def get_baichuan_data(baichuanidnamedict):
|
|
||||||
baichuanidlist = list(baichuanidnamedict.keys())
|
|
||||||
# 连接数据库
|
|
||||||
db = config.db_mysql
|
|
||||||
db.connect()
|
|
||||||
# 执行SQL查询 select BAICHUAN_ID,DATA_DATE,DATA_VALUE from V_TBL_BAICHUAN_YINGFU_VALUE where BAICHUAN_ID in ('1588348470396475286','1666');
|
|
||||||
sql = f"SELECT BAICHUAN_ID,DATA_DATE,DATA_VALUE FROM {global_config['baichuan_table_name']} WHERE BAICHUAN_ID in ({','.join(baichuanidlist)})"
|
|
||||||
|
|
||||||
# 获取查询结果
|
|
||||||
results = db.execute_query(sql)
|
|
||||||
df = pd.DataFrame(results, columns=[
|
|
||||||
'BAICHUAN_ID', 'DATA_DATE', 'DATA_VALUE'])
|
|
||||||
|
|
||||||
# 按BAICHUAN_ID 进行分组,然后按DATA_DATE合并
|
|
||||||
df1 = pd.DataFrame(columns=['DATA_DATE'])
|
|
||||||
for baichuan_id, group in df.groupby('BAICHUAN_ID'):
|
|
||||||
# group 删除BAICHUAN_ID列
|
|
||||||
group.drop(columns=['BAICHUAN_ID'], inplace=True)
|
|
||||||
# group DATA_value 转换为float类型,保留两位小数
|
|
||||||
group['DATA_VALUE'] = group['DATA_VALUE'].astype(float).round(2)
|
|
||||||
# group 更改列名
|
|
||||||
group.rename(
|
|
||||||
columns={'DATA_VALUE': baichuanidnamedict[baichuan_id]}, inplace=True)
|
|
||||||
# 按DATA_DATE合并
|
|
||||||
df1 = pd.merge(
|
|
||||||
df1, group[['DATA_DATE', baichuanidnamedict[baichuan_id]]], on='DATA_DATE', how='outer')
|
|
||||||
# 把DATA_DATE 列转换成日期格式
|
|
||||||
df1['date'] = pd.to_datetime(
|
|
||||||
df1['DATA_DATE']).dt.strftime('%Y-%m-%d')
|
|
||||||
df1.drop(columns=['DATA_DATE'], inplace=True)
|
|
||||||
|
|
||||||
return df1
|
|
||||||
|
@ -1,301 +0,0 @@
|
|||||||
# 读取配置
|
|
||||||
from lib.dataread import *
|
|
||||||
from lib.tools import SendMail,exception_logger
|
|
||||||
from models.nerulforcastmodels import ex_Model_Juxiting,model_losss,model_losss_juxiting,brent_export_pdf,tansuanli_export_pdf,pp_export_pdf,model_losss_juxiting
|
|
||||||
|
|
||||||
import glob
|
|
||||||
import torch
|
|
||||||
torch.set_float32_matmul_precision("high")
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def predict_main():
|
|
||||||
"""
|
|
||||||
主预测函数,用于从 ETA 获取数据、处理数据、训练模型并进行预测。
|
|
||||||
|
|
||||||
参数:
|
|
||||||
signature (BinanceAPI): Binance API 实例。
|
|
||||||
etadata (EtaReader): ETA 数据读取器实例。
|
|
||||||
is_eta (bool): 是否从 ETA 获取数据。
|
|
||||||
data_set (str): 数据集名称。
|
|
||||||
dataset (str): 数据集路径。
|
|
||||||
add_kdj (bool): 是否添加 KDJ 指标。
|
|
||||||
is_timefurture (bool): 是否添加时间衍生特征。
|
|
||||||
end_time (str): 结束时间。
|
|
||||||
is_edbnamelist (bool): 是否使用 EDB 名称列表。
|
|
||||||
edbnamelist (list): EDB 名称列表。
|
|
||||||
y (str): 预测目标列名。
|
|
||||||
sqlitedb (SQLiteDB): SQLite 数据库实例。
|
|
||||||
is_corr (bool): 是否进行相关性分析。
|
|
||||||
horizon (int): 预测时域。
|
|
||||||
input_size (int): 输入数据大小。
|
|
||||||
train_steps (int): 训练步数。
|
|
||||||
val_check_steps (int): 验证检查步数。
|
|
||||||
early_stop_patience_steps (int): 早停耐心步数。
|
|
||||||
is_debug (bool): 是否调试模式。
|
|
||||||
dataset (str): 数据集名称。
|
|
||||||
is_train (bool): 是否训练模型。
|
|
||||||
is_fivemodels (bool): 是否使用五个模型。
|
|
||||||
val_size (float): 验证集大小。
|
|
||||||
test_size (float): 测试集大小。
|
|
||||||
settings (dict): 模型设置。
|
|
||||||
now (str): 当前时间。
|
|
||||||
etadata (EtaReader): ETA 数据读取器实例。
|
|
||||||
modelsindex (list): 模型索引列表。
|
|
||||||
data (str): 数据类型。
|
|
||||||
is_eta (bool): 是否从 ETA 获取数据。
|
|
||||||
|
|
||||||
返回:
|
|
||||||
None
|
|
||||||
"""
|
|
||||||
global end_time
|
|
||||||
signature = BinanceAPI(APPID, SECRET)
|
|
||||||
etadata = EtaReader(signature=signature,
|
|
||||||
classifylisturl=classifylisturl,
|
|
||||||
classifyidlisturl=classifyidlisturl,
|
|
||||||
edbcodedataurl=edbcodedataurl,
|
|
||||||
edbcodelist=edbcodelist,
|
|
||||||
edbdatapushurl=edbdatapushurl,
|
|
||||||
edbdeleteurl=edbdeleteurl,
|
|
||||||
edbbusinessurl=edbbusinessurl
|
|
||||||
)
|
|
||||||
# 获取数据
|
|
||||||
if is_eta:
|
|
||||||
logger.info('从eta获取数据...')
|
|
||||||
signature = BinanceAPI(APPID, SECRET)
|
|
||||||
etadata = EtaReader(signature=signature,
|
|
||||||
classifylisturl=classifylisturl,
|
|
||||||
classifyidlisturl=classifyidlisturl,
|
|
||||||
edbcodedataurl=edbcodedataurl,
|
|
||||||
edbcodelist=edbcodelist,
|
|
||||||
edbdatapushurl=edbdatapushurl,
|
|
||||||
edbdeleteurl=edbdeleteurl,
|
|
||||||
edbbusinessurl=edbbusinessurl,
|
|
||||||
)
|
|
||||||
df_zhibiaoshuju, df_zhibiaoliebiao = etadata.get_eta_api_shiyoujiao_data(data_set=data_set, dataset=dataset) # 原始数据,未处理
|
|
||||||
|
|
||||||
if is_market:
|
|
||||||
logger.info('从市场信息平台获取数据...')
|
|
||||||
try:
|
|
||||||
# 如果是测试环境,最高价最低价取excel文档
|
|
||||||
if server_host == '192.168.100.53':
|
|
||||||
logger.info('从excel文档获取最高价最低价')
|
|
||||||
df_zhibiaoshuju = get_high_low_data(df_zhibiaoshuju)
|
|
||||||
else:
|
|
||||||
logger.info('从市场信息平台获取数据')
|
|
||||||
df_zhibiaoshuju = get_market_data(end_time,df_zhibiaoshuju)
|
|
||||||
|
|
||||||
except :
|
|
||||||
logger.info('最高最低价拼接失败')
|
|
||||||
|
|
||||||
# 保存到xlsx文件的sheet表
|
|
||||||
with pd.ExcelWriter(os.path.join(dataset,data_set)) as file:
|
|
||||||
df_zhibiaoshuju.to_excel(file, sheet_name='指标数据', index=False)
|
|
||||||
df_zhibiaoliebiao.to_excel(file, sheet_name='指标列表', index=False)
|
|
||||||
|
|
||||||
|
|
||||||
# 数据处理
|
|
||||||
df = datachuli_juxiting(df_zhibiaoshuju, df_zhibiaoliebiao, y=y, dataset=dataset, add_kdj=add_kdj, is_timefurture=is_timefurture,
|
|
||||||
end_time=end_time)
|
|
||||||
|
|
||||||
else:
|
|
||||||
# 读取数据
|
|
||||||
logger.info('读取本地数据:' + os.path.join(dataset, data_set))
|
|
||||||
df,df_zhibiaoliebiao = getdata_juxiting(filename=os.path.join(dataset, data_set), y=y, dataset=dataset, add_kdj=add_kdj,
|
|
||||||
is_timefurture=is_timefurture, end_time=end_time) # 原始数据,未处理
|
|
||||||
|
|
||||||
# 更改预测列名称
|
|
||||||
df.rename(columns={y: 'y'}, inplace=True)
|
|
||||||
|
|
||||||
if is_edbnamelist:
|
|
||||||
df = df[edbnamelist]
|
|
||||||
df.to_csv(os.path.join(dataset, '指标数据.csv'), index=False)
|
|
||||||
# 保存最新日期的y值到数据库
|
|
||||||
# 取第一行数据存储到数据库中
|
|
||||||
first_row = df[['ds', 'y']].tail(1)
|
|
||||||
# 判断y的类型是否为float
|
|
||||||
if not isinstance(first_row['y'].values[0], float):
|
|
||||||
logger.info(f'{end_time}预测目标数据为空,跳过')
|
|
||||||
return None
|
|
||||||
|
|
||||||
# 将最新真实值保存到数据库
|
|
||||||
if not sqlitedb.check_table_exists('trueandpredict'):
|
|
||||||
first_row.to_sql('trueandpredict', sqlitedb.connection, index=False)
|
|
||||||
else:
|
|
||||||
for row in first_row.itertuples(index=False):
|
|
||||||
row_dict = row._asdict()
|
|
||||||
row_dict['ds'] = row_dict['ds'].strftime('%Y-%m-%d %H:%M:%S')
|
|
||||||
check_query = sqlitedb.select_data('trueandpredict', where_condition=f"ds = '{row.ds}'")
|
|
||||||
if len(check_query) > 0:
|
|
||||||
set_clause = ", ".join([f"{key} = '{value}'" for key, value in row_dict.items()])
|
|
||||||
sqlitedb.update_data('trueandpredict', set_clause, where_condition=f"ds = '{row.ds}'")
|
|
||||||
continue
|
|
||||||
sqlitedb.insert_data('trueandpredict', tuple(row_dict.values()), columns=row_dict.keys())
|
|
||||||
|
|
||||||
# 更新accuracy表的y值
|
|
||||||
if not sqlitedb.check_table_exists('accuracy'):
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
update_y = sqlitedb.select_data('accuracy',where_condition="y is null")
|
|
||||||
if len(update_y) > 0:
|
|
||||||
logger.info('更新accuracy表的y值')
|
|
||||||
# 找到update_y 中ds且df中的y的行
|
|
||||||
update_y = update_y[update_y['ds']<=end_time]
|
|
||||||
logger.info(f'要更新y的信息:{update_y}')
|
|
||||||
# try:
|
|
||||||
for row in update_y.itertuples(index=False):
|
|
||||||
try:
|
|
||||||
row_dict = row._asdict()
|
|
||||||
yy = df[df['ds']==row_dict['ds']]['y'].values[0]
|
|
||||||
LOW = df[df['ds']==row_dict['ds']]['Brentzdj'].values[0]
|
|
||||||
HIGH = df[df['ds']==row_dict['ds']]['Brentzgj'].values[0]
|
|
||||||
sqlitedb.update_data('accuracy', f"y = {yy},LOW_PRICE = {LOW},HIGH_PRICE = {HIGH}", where_condition=f"ds = '{row_dict['ds']}'")
|
|
||||||
except:
|
|
||||||
logger.info(f'更新accuracy表的y值失败:{row_dict}')
|
|
||||||
# except Exception as e:
|
|
||||||
# logger.info(f'更新accuracy表的y值失败:{e}')
|
|
||||||
|
|
||||||
import datetime
|
|
||||||
# 判断当前日期是不是周一
|
|
||||||
is_weekday = datetime.datetime.now().weekday() == 0
|
|
||||||
if is_weekday:
|
|
||||||
logger.info('今天是周一,更新预测模型')
|
|
||||||
# 计算最近60天预测残差最低的模型名称
|
|
||||||
model_results = sqlitedb.select_data('trueandpredict', order_by="ds DESC", limit="60")
|
|
||||||
# 删除空值率为90%以上的列
|
|
||||||
if len(model_results) > 10:
|
|
||||||
model_results = model_results.dropna(thresh=len(model_results)*0.1,axis=1)
|
|
||||||
# 删除空行
|
|
||||||
model_results = model_results.dropna()
|
|
||||||
modelnames = model_results.columns.to_list()[2:-1]
|
|
||||||
for col in model_results[modelnames].select_dtypes(include=['object']).columns:
|
|
||||||
model_results[col] = model_results[col].astype(np.float32)
|
|
||||||
# 计算每个预测值与真实值之间的偏差率
|
|
||||||
for model in modelnames:
|
|
||||||
model_results[f'{model}_abs_error_rate'] = abs(model_results['y'] - model_results[model]) / model_results['y']
|
|
||||||
# 获取每行对应的最小偏差率值
|
|
||||||
min_abs_error_rate_values = model_results.apply(lambda row: row[[f'{model}_abs_error_rate' for model in modelnames]].min(), axis=1)
|
|
||||||
# 获取每行对应的最小偏差率值对应的列名
|
|
||||||
min_abs_error_rate_column_name = model_results.apply(lambda row: row[[f'{model}_abs_error_rate' for model in modelnames]].idxmin(), axis=1)
|
|
||||||
# 将列名索引转换为列名
|
|
||||||
min_abs_error_rate_column_name = min_abs_error_rate_column_name.map(lambda x: x.split('_')[0])
|
|
||||||
# 取出现次数最多的模型名称
|
|
||||||
most_common_model = min_abs_error_rate_column_name.value_counts().idxmax()
|
|
||||||
logger.info(f"最近60天预测残差最低的模型名称:{most_common_model}")
|
|
||||||
# 保存结果到数据库
|
|
||||||
if not sqlitedb.check_table_exists('most_model'):
|
|
||||||
sqlitedb.create_table('most_model', columns="ds datetime, most_common_model TEXT")
|
|
||||||
sqlitedb.insert_data('most_model', (datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), most_common_model,), columns=('ds', 'most_common_model',))
|
|
||||||
|
|
||||||
try:
|
|
||||||
if is_weekday:
|
|
||||||
# if True:
|
|
||||||
logger.info('今天是周一,发送特征预警')
|
|
||||||
# 上传预警信息到数据库
|
|
||||||
warning_data_df = df_zhibiaoliebiao.copy()
|
|
||||||
warning_data_df = warning_data_df[warning_data_df['停更周期']> 3 ][['指标名称', '指标id', '频度','更新周期','指标来源','最后更新时间','停更周期']]
|
|
||||||
# 重命名列名
|
|
||||||
warning_data_df = warning_data_df.rename(columns={'指标名称': 'INDICATOR_NAME', '指标id': 'INDICATOR_ID', '频度': 'FREQUENCY', '更新周期': 'UPDATE_FREQUENCY', '指标来源': 'DATA_SOURCE', '最后更新时间': 'LAST_UPDATE_DATE', '停更周期': 'UPDATE_SUSPENSION_CYCLE'})
|
|
||||||
from sqlalchemy import create_engine
|
|
||||||
import urllib
|
|
||||||
global password
|
|
||||||
if '@' in password:
|
|
||||||
password = urllib.parse.quote_plus(password)
|
|
||||||
|
|
||||||
engine = create_engine(f'mysql+pymysql://{dbusername}:{password}@{host}:{port}/{dbname}')
|
|
||||||
warning_data_df['WARNING_DATE'] = datetime.date.today().strftime("%Y-%m-%d %H:%M:%S")
|
|
||||||
warning_data_df['TENANT_CODE'] = 'T0004'
|
|
||||||
# 插入数据之前查询表数据然后新增id列
|
|
||||||
existing_data = pd.read_sql(f"SELECT * FROM {table_name}", engine)
|
|
||||||
if not existing_data.empty:
|
|
||||||
max_id = existing_data['ID'].astype(int).max()
|
|
||||||
warning_data_df['ID'] = range(max_id + 1, max_id + 1 + len(warning_data_df))
|
|
||||||
else:
|
|
||||||
warning_data_df['ID'] = range(1, 1 + len(warning_data_df))
|
|
||||||
warning_data_df.to_sql(table_name, con=engine, if_exists='append', index=False)
|
|
||||||
if is_update_warning_data:
|
|
||||||
upload_warning_info(len(warning_data_df))
|
|
||||||
except:
|
|
||||||
logger.info('上传预警信息到数据库失败')
|
|
||||||
|
|
||||||
if is_corr:
|
|
||||||
df = corr_feature(df=df)
|
|
||||||
|
|
||||||
df1 = df.copy() # 备份一下,后面特征筛选完之后加入ds y 列用
|
|
||||||
logger.info(f"开始训练模型...")
|
|
||||||
row, col = df.shape
|
|
||||||
|
|
||||||
now = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
|
|
||||||
ex_Model_Juxiting(df,
|
|
||||||
horizon=horizon,
|
|
||||||
input_size=input_size,
|
|
||||||
train_steps=train_steps,
|
|
||||||
val_check_steps=val_check_steps,
|
|
||||||
early_stop_patience_steps=early_stop_patience_steps,
|
|
||||||
is_debug=is_debug,
|
|
||||||
dataset=dataset,
|
|
||||||
is_train=is_train,
|
|
||||||
is_fivemodels=is_fivemodels,
|
|
||||||
val_size=val_size,
|
|
||||||
test_size=test_size,
|
|
||||||
settings=settings,
|
|
||||||
now=now,
|
|
||||||
etadata=etadata,
|
|
||||||
modelsindex=modelsindex,
|
|
||||||
data=data,
|
|
||||||
is_eta=is_eta,
|
|
||||||
end_time=end_time,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
logger.info('模型训练完成')
|
|
||||||
|
|
||||||
logger.info('训练数据绘图ing')
|
|
||||||
model_results3 = model_losss_juxiting(sqlitedb)
|
|
||||||
logger.info('训练数据绘图end')
|
|
||||||
|
|
||||||
# 模型报告
|
|
||||||
logger.info('制作报告ing')
|
|
||||||
title = f'{settings}--{end_time}-预测报告' # 报告标题
|
|
||||||
reportname = f'PP大模型预测报告--{end_time}.pdf' # 报告文件名
|
|
||||||
reportname = reportname.replace(':', '-') # 替换冒号
|
|
||||||
pp_export_pdf(dataset=dataset,num_models = 5 if is_fivemodels else 22,time=end_time,
|
|
||||||
reportname=reportname,sqlitedb=sqlitedb),
|
|
||||||
|
|
||||||
logger.info('制作报告end')
|
|
||||||
logger.info('模型训练完成')
|
|
||||||
|
|
||||||
# # LSTM 单变量模型
|
|
||||||
# ex_Lstm(df,input_seq_len=input_size,output_seq_len=horizon,is_debug=is_debug,dataset=dataset)
|
|
||||||
|
|
||||||
# # lstm 多变量模型
|
|
||||||
# ex_Lstm_M(df,n_days=input_size,out_days=horizon,is_debug=is_debug,datasetpath=dataset)
|
|
||||||
|
|
||||||
# # GRU 模型
|
|
||||||
# # ex_GRU(df)
|
|
||||||
|
|
||||||
# 发送邮件
|
|
||||||
m = SendMail(
|
|
||||||
username=username,
|
|
||||||
passwd=passwd,
|
|
||||||
recv=recv,
|
|
||||||
title=title,
|
|
||||||
content=content,
|
|
||||||
file=max(glob.glob(os.path.join(dataset,'*.pdf')), key=os.path.getctime),
|
|
||||||
ssl=ssl,
|
|
||||||
)
|
|
||||||
# m.send_mail()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
# global end_time
|
|
||||||
# is_on = True
|
|
||||||
# # 遍历2024-11-25 到 2024-12-3 之间的工作日日期
|
|
||||||
# for i_time in pd.date_range('2025-1-20', '2025-2-6', freq='B'):
|
|
||||||
# end_time = i_time.strftime('%Y-%m-%d')
|
|
||||||
# try:
|
|
||||||
# predict_main()
|
|
||||||
# except:
|
|
||||||
# pass
|
|
||||||
|
|
||||||
predict_main()
|
|
@ -3,7 +3,7 @@
|
|||||||
from lib.dataread import *
|
from lib.dataread import *
|
||||||
from config_shiyoujiao_lvyong import *
|
from config_shiyoujiao_lvyong import *
|
||||||
from lib.tools import SendMail, exception_logger
|
from lib.tools import SendMail, exception_logger
|
||||||
from models.nerulforcastmodels import ex_Model, model_losss, model_losss_juxiting, brent_export_pdf, tansuanli_export_pdf, pp_export_pdf, model_losss_juxiting
|
from models.nerulforcastmodels import model_losss, shiyoujiao_lvyong_export_pdf
|
||||||
import datetime
|
import datetime
|
||||||
import torch
|
import torch
|
||||||
torch.set_float32_matmul_precision("high")
|
torch.set_float32_matmul_precision("high")
|
||||||
@ -18,7 +18,6 @@ global_config.update({
|
|||||||
'is_fivemodels': is_fivemodels,
|
'is_fivemodels': is_fivemodels,
|
||||||
'settings': settings,
|
'settings': settings,
|
||||||
'weight_dict': weight_dict,
|
'weight_dict': weight_dict,
|
||||||
'baichuanidnamedict': baichuanidnamedict,
|
|
||||||
|
|
||||||
|
|
||||||
# 模型参数
|
# 模型参数
|
||||||
@ -73,14 +72,11 @@ global_config.update({
|
|||||||
'edbdatapushurl': edbdatapushurl,
|
'edbdatapushurl': edbdatapushurl,
|
||||||
'edbdeleteurl': edbdeleteurl,
|
'edbdeleteurl': edbdeleteurl,
|
||||||
'edbbusinessurl': edbbusinessurl,
|
'edbbusinessurl': edbbusinessurl,
|
||||||
'edbcodenamedict': edbcodenamedict,
|
|
||||||
'ClassifyId': ClassifyId,
|
'ClassifyId': ClassifyId,
|
||||||
'classifylisturl': classifylisturl,
|
'classifylisturl': classifylisturl,
|
||||||
|
|
||||||
# 数据库配置
|
# 数据库配置
|
||||||
'sqlitedb': sqlitedb,
|
'sqlitedb': sqlitedb,
|
||||||
'db_mysql': db_mysql,
|
|
||||||
'baichuan_table_name': baichuan_table_name,
|
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
||||||
@ -177,242 +173,228 @@ def predict_main():
|
|||||||
返回:
|
返回:
|
||||||
None
|
None
|
||||||
"""
|
"""
|
||||||
|
# end_time = global_config['end_time']
|
||||||
|
# # 获取数据
|
||||||
|
# if is_eta:
|
||||||
|
# logger.info('从eta获取数据...')
|
||||||
|
# signature = BinanceAPI(APPID, SECRET)
|
||||||
|
# etadata = EtaReader(signature=signature,
|
||||||
|
# classifylisturl=global_config['classifylisturl'],
|
||||||
|
# classifyidlisturl=global_config['classifyidlisturl'],
|
||||||
|
# edbcodedataurl=global_config['edbcodedataurl'],
|
||||||
|
# edbcodelist=global_config['edbcodelist'],
|
||||||
|
# edbdatapushurl=global_config['edbdatapushurl'],
|
||||||
|
# edbdeleteurl=global_config['edbdeleteurl'],
|
||||||
|
# edbbusinessurl=global_config['edbbusinessurl'],
|
||||||
|
# classifyId=global_config['ClassifyId'],
|
||||||
|
# )
|
||||||
|
# df_zhibiaoshuju, df_zhibiaoliebiao = etadata.get_eta_api_shiyoujiao_lvyong_data(
|
||||||
|
# data_set=data_set, dataset=dataset) # 原始数据,未处理
|
||||||
|
|
||||||
end_time = global_config['end_time']
|
# if is_market:
|
||||||
# 获取数据
|
# logger.info('从市场信息平台获取数据...')
|
||||||
if is_eta:
|
# try:
|
||||||
logger.info('从eta获取数据...')
|
# # 如果是测试环境,最高价最低价取excel文档
|
||||||
signature = BinanceAPI(APPID, SECRET)
|
# if server_host == '192.168.100.53':
|
||||||
etadata = EtaReader(signature=signature,
|
# logger.info('从excel文档获取最高价最低价')
|
||||||
classifylisturl=global_config['classifylisturl'],
|
# df_zhibiaoshuju = get_high_low_data(df_zhibiaoshuju)
|
||||||
classifyidlisturl=global_config['classifyidlisturl'],
|
# else:
|
||||||
edbcodedataurl=global_config['edbcodedataurl'],
|
# logger.info('从市场信息平台获取数据')
|
||||||
edbcodelist=global_config['edbcodelist'],
|
# df_zhibiaoshuju = get_market_data(
|
||||||
edbdatapushurl=global_config['edbdatapushurl'],
|
# end_time, df_zhibiaoshuju)
|
||||||
edbdeleteurl=global_config['edbdeleteurl'],
|
|
||||||
edbbusinessurl=global_config['edbbusinessurl'],
|
|
||||||
classifyId=global_config['ClassifyId'],
|
|
||||||
)
|
|
||||||
df_zhibiaoshuju, df_zhibiaoliebiao = etadata.get_eta_api_shiyoujiao_lvyong_data(
|
|
||||||
data_set=data_set, dataset=dataset) # 原始数据,未处理
|
|
||||||
|
|
||||||
if is_market:
|
# except:
|
||||||
logger.info('从市场信息平台获取数据...')
|
# logger.info('最高最低价拼接失败')
|
||||||
try:
|
|
||||||
# 如果是测试环境,最高价最低价取excel文档
|
|
||||||
if server_host == '192.168.100.53':
|
|
||||||
logger.info('从excel文档获取最高价最低价')
|
|
||||||
df_zhibiaoshuju = get_high_low_data(df_zhibiaoshuju)
|
|
||||||
else:
|
|
||||||
logger.info('从市场信息平台获取数据')
|
|
||||||
df_zhibiaoshuju = get_market_data(
|
|
||||||
end_time, df_zhibiaoshuju)
|
|
||||||
|
|
||||||
except:
|
# # 保存到xlsx文件的sheet表
|
||||||
logger.info('最高最低价拼接失败')
|
# with pd.ExcelWriter(os.path.join(dataset, data_set)) as file:
|
||||||
|
# df_zhibiaoshuju.to_excel(file, sheet_name='指标数据', index=False)
|
||||||
|
# df_zhibiaoliebiao.to_excel(file, sheet_name='指标列表', index=False)
|
||||||
|
|
||||||
if len(global_config['baichuanidnamedict']) > 0:
|
# # 数据处理
|
||||||
logger.info('从市场数据库获取百川数据...')
|
# df = datachuli(df_zhibiaoshuju, df_zhibiaoliebiao, y=global_config['y'], dataset=dataset, add_kdj=add_kdj, is_timefurture=is_timefurture,
|
||||||
baichuandf = get_baichuan_data(global_config['baichuanidnamedict'])
|
# end_time=end_time)
|
||||||
df_zhibiaoshuju = pd.merge(
|
|
||||||
df_zhibiaoshuju, baichuandf, on='date', how='outer')
|
|
||||||
# 指标列表添加百川数据
|
|
||||||
df_baichuanliebiao = pd.DataFrame(
|
|
||||||
global_config['baichuanidnamedict'].items(), columns=['指标id', '指标名称'])
|
|
||||||
df_baichuanliebiao['指标分类'] = '百川'
|
|
||||||
df_baichuanliebiao['频度'] = '其他'
|
|
||||||
df_zhibiaoliebiao = pd.concat(
|
|
||||||
[df_zhibiaoliebiao, df_baichuanliebiao], axis=0)
|
|
||||||
|
|
||||||
# 保存到xlsx文件的sheet表
|
# else:
|
||||||
with pd.ExcelWriter(os.path.join(dataset, data_set)) as file:
|
# # 读取数据
|
||||||
df_zhibiaoshuju.to_excel(file, sheet_name='指标数据', index=False)
|
# logger.info('读取本地数据:' + os.path.join(dataset, data_set))
|
||||||
df_zhibiaoliebiao.to_excel(file, sheet_name='指标列表', index=False)
|
# df, df_zhibiaoliebiao = getdata(filename=os.path.join(dataset, data_set), y=y, dataset=dataset, add_kdj=add_kdj,
|
||||||
|
# is_timefurture=is_timefurture, end_time=end_time) # 原始数据,未处理
|
||||||
|
|
||||||
# 数据处理
|
# # 更改预测列名称
|
||||||
df = datachuli(df_zhibiaoshuju, df_zhibiaoliebiao, y=global_config['y'], dataset=dataset, add_kdj=add_kdj, is_timefurture=is_timefurture,
|
# df.rename(columns={y: 'y'}, inplace=True)
|
||||||
end_time=end_time)
|
|
||||||
|
|
||||||
else:
|
# if is_edbnamelist:
|
||||||
# 读取数据
|
# df = df[edbnamelist]
|
||||||
logger.info('读取本地数据:' + os.path.join(dataset, data_set))
|
# df.to_csv(os.path.join(dataset, '指标数据.csv'), index=False)
|
||||||
df, df_zhibiaoliebiao = getdata(filename=os.path.join(dataset, data_set), y=y, dataset=dataset, add_kdj=add_kdj,
|
# # 保存最新日期的y值到数据库
|
||||||
is_timefurture=is_timefurture, end_time=end_time) # 原始数据,未处理
|
# # 取第一行数据存储到数据库中
|
||||||
|
# first_row = df[['ds', 'y']].tail(1)
|
||||||
|
# # 判断y的类型是否为float
|
||||||
|
# if not isinstance(first_row['y'].values[0], float):
|
||||||
|
# logger.info(f'{end_time}预测目标数据为空,跳过')
|
||||||
|
# return None
|
||||||
|
|
||||||
# 更改预测列名称
|
# # 将最新真实值保存到数据库
|
||||||
df.rename(columns={y: 'y'}, inplace=True)
|
# if not sqlitedb.check_table_exists('trueandpredict'):
|
||||||
|
# first_row.to_sql('trueandpredict', sqlitedb.connection, index=False)
|
||||||
|
# else:
|
||||||
|
# for row in first_row.itertuples(index=False):
|
||||||
|
# row_dict = row._asdict()
|
||||||
|
# config.logger.info(f'要保存的真实值:{row_dict}')
|
||||||
|
# # 判断ds是否为字符串类型,如果不是则转换为字符串类型
|
||||||
|
# if isinstance(row_dict['ds'], (pd.Timestamp, datetime.datetime)):
|
||||||
|
# row_dict['ds'] = row_dict['ds'].strftime('%Y-%m-%d')
|
||||||
|
# elif not isinstance(row_dict['ds'], str):
|
||||||
|
# try:
|
||||||
|
# row_dict['ds'] = pd.to_datetime(
|
||||||
|
# row_dict['ds']).strftime('%Y-%m-%d')
|
||||||
|
# except:
|
||||||
|
# logger.warning(f"无法解析的时间格式: {row_dict['ds']}")
|
||||||
|
# # row_dict['ds'] = row_dict['ds'].strftime('%Y-%m-%d')
|
||||||
|
# # row_dict['ds'] = row_dict['ds'].strftime('%Y-%m-%d %H:%M:%S')
|
||||||
|
# check_query = sqlitedb.select_data(
|
||||||
|
# 'trueandpredict', where_condition=f"ds = '{row.ds}'")
|
||||||
|
# if len(check_query) > 0:
|
||||||
|
# set_clause = ", ".join(
|
||||||
|
# [f"{key} = '{value}'" for key, value in row_dict.items()])
|
||||||
|
# sqlitedb.update_data(
|
||||||
|
# 'trueandpredict', set_clause, where_condition=f"ds = '{row.ds}'")
|
||||||
|
# continue
|
||||||
|
# sqlitedb.insert_data('trueandpredict', tuple(
|
||||||
|
# row_dict.values()), columns=row_dict.keys())
|
||||||
|
|
||||||
if is_edbnamelist:
|
# # 更新accuracy表的y值
|
||||||
df = df[edbnamelist]
|
# if not sqlitedb.check_table_exists('accuracy'):
|
||||||
df.to_csv(os.path.join(dataset, '指标数据.csv'), index=False)
|
# pass
|
||||||
# 保存最新日期的y值到数据库
|
# else:
|
||||||
# 取第一行数据存储到数据库中
|
# update_y = sqlitedb.select_data(
|
||||||
first_row = df[['ds', 'y']].tail(1)
|
# 'accuracy', where_condition="y is null")
|
||||||
# 判断y的类型是否为float
|
# if len(update_y) > 0:
|
||||||
if not isinstance(first_row['y'].values[0], float):
|
# logger.info('更新accuracy表的y值')
|
||||||
logger.info(f'{end_time}预测目标数据为空,跳过')
|
# # 找到update_y 中ds且df中的y的行
|
||||||
return None
|
# update_y = update_y[update_y['ds'] <= end_time]
|
||||||
|
# logger.info(f'要更新y的信息:{update_y}')
|
||||||
|
# # try:
|
||||||
|
# for row in update_y.itertuples(index=False):
|
||||||
|
# try:
|
||||||
|
# row_dict = row._asdict()
|
||||||
|
# yy = df[df['ds'] == row_dict['ds']]['y'].values[0]
|
||||||
|
# LOW = df[df['ds'] == row_dict['ds']]['Brentzdj'].values[0]
|
||||||
|
# HIGH = df[df['ds'] == row_dict['ds']]['Brentzgj'].values[0]
|
||||||
|
# sqlitedb.update_data(
|
||||||
|
# 'accuracy', f"y = {yy},LOW_PRICE = {LOW},HIGH_PRICE = {HIGH}", where_condition=f"ds = '{row_dict['ds']}'")
|
||||||
|
# except:
|
||||||
|
# logger.info(f'更新accuracy表的y值失败:{row_dict}')
|
||||||
|
# # except Exception as e:
|
||||||
|
# # logger.info(f'更新accuracy表的y值失败:{e}')
|
||||||
|
|
||||||
# 将最新真实值保存到数据库
|
# # 判断当前日期是不是周一
|
||||||
if not sqlitedb.check_table_exists('trueandpredict'):
|
# is_weekday = datetime.datetime.now().weekday() == 0
|
||||||
first_row.to_sql('trueandpredict', sqlitedb.connection, index=False)
|
# if is_weekday:
|
||||||
else:
|
# logger.info('今天是周一,更新预测模型')
|
||||||
for row in first_row.itertuples(index=False):
|
# # 计算最近60天预测残差最低的模型名称
|
||||||
row_dict = row._asdict()
|
# model_results = sqlitedb.select_data(
|
||||||
config.logger.info(f'要保存的真实值:{row_dict}')
|
# 'trueandpredict', order_by="ds DESC", limit="60")
|
||||||
# 判断ds是否为字符串类型,如果不是则转换为字符串类型
|
# # 删除空值率为90%以上的列
|
||||||
if isinstance(row_dict['ds'], (pd.Timestamp, datetime.datetime)):
|
# if len(model_results) > 10:
|
||||||
row_dict['ds'] = row_dict['ds'].strftime('%Y-%m-%d')
|
# model_results = model_results.dropna(
|
||||||
elif not isinstance(row_dict['ds'], str):
|
# thresh=len(model_results)*0.1, axis=1)
|
||||||
try:
|
# # 删除空行
|
||||||
row_dict['ds'] = pd.to_datetime(
|
# model_results = model_results.dropna()
|
||||||
row_dict['ds']).strftime('%Y-%m-%d')
|
# modelnames = model_results.columns.to_list()[2:-1]
|
||||||
except:
|
# for col in model_results[modelnames].select_dtypes(include=['object']).columns:
|
||||||
logger.warning(f"无法解析的时间格式: {row_dict['ds']}")
|
# model_results[col] = model_results[col].astype(np.float32)
|
||||||
# row_dict['ds'] = row_dict['ds'].strftime('%Y-%m-%d')
|
# # 计算每个预测值与真实值之间的偏差率
|
||||||
# row_dict['ds'] = row_dict['ds'].strftime('%Y-%m-%d %H:%M:%S')
|
# for model in modelnames:
|
||||||
check_query = sqlitedb.select_data(
|
# model_results[f'{model}_abs_error_rate'] = abs(
|
||||||
'trueandpredict', where_condition=f"ds = '{row.ds}'")
|
# model_results['y'] - model_results[model]) / model_results['y']
|
||||||
if len(check_query) > 0:
|
# # 获取每行对应的最小偏差率值
|
||||||
set_clause = ", ".join(
|
# min_abs_error_rate_values = model_results.apply(
|
||||||
[f"{key} = '{value}'" for key, value in row_dict.items()])
|
# lambda row: row[[f'{model}_abs_error_rate' for model in modelnames]].min(), axis=1)
|
||||||
sqlitedb.update_data(
|
# # 获取每行对应的最小偏差率值对应的列名
|
||||||
'trueandpredict', set_clause, where_condition=f"ds = '{row.ds}'")
|
# min_abs_error_rate_column_name = model_results.apply(
|
||||||
continue
|
# lambda row: row[[f'{model}_abs_error_rate' for model in modelnames]].idxmin(), axis=1)
|
||||||
sqlitedb.insert_data('trueandpredict', tuple(
|
# # 将列名索引转换为列名
|
||||||
row_dict.values()), columns=row_dict.keys())
|
# min_abs_error_rate_column_name = min_abs_error_rate_column_name.map(
|
||||||
|
# lambda x: x.split('_')[0])
|
||||||
|
# # 取出现次数最多的模型名称
|
||||||
|
# most_common_model = min_abs_error_rate_column_name.value_counts().idxmax()
|
||||||
|
# logger.info(f"最近60天预测残差最低的模型名称:{most_common_model}")
|
||||||
|
# # 保存结果到数据库
|
||||||
|
# if not sqlitedb.check_table_exists('most_model'):
|
||||||
|
# sqlitedb.create_table(
|
||||||
|
# 'most_model', columns="ds datetime, most_common_model TEXT")
|
||||||
|
# sqlitedb.insert_data('most_model', (datetime.datetime.now().strftime(
|
||||||
|
# '%Y-%m-%d %H:%M:%S'), most_common_model,), columns=('ds', 'most_common_model',))
|
||||||
|
|
||||||
# 更新accuracy表的y值
|
# try:
|
||||||
if not sqlitedb.check_table_exists('accuracy'):
|
# if is_weekday:
|
||||||
pass
|
# # if True:
|
||||||
else:
|
# logger.info('今天是周一,发送特征预警')
|
||||||
update_y = sqlitedb.select_data(
|
# # 上传预警信息到数据库
|
||||||
'accuracy', where_condition="y is null")
|
# warning_data_df = df_zhibiaoliebiao.copy()
|
||||||
if len(update_y) > 0:
|
# warning_data_df = warning_data_df[warning_data_df['停更周期'] > 3][[
|
||||||
logger.info('更新accuracy表的y值')
|
# '指标名称', '指标id', '频度', '更新周期', '指标来源', '最后更新时间', '停更周期']]
|
||||||
# 找到update_y 中ds且df中的y的行
|
# # 重命名列名
|
||||||
update_y = update_y[update_y['ds'] <= end_time]
|
# warning_data_df = warning_data_df.rename(columns={'指标名称': 'INDICATOR_NAME', '指标id': 'INDICATOR_ID', '频度': 'FREQUENCY',
|
||||||
logger.info(f'要更新y的信息:{update_y}')
|
# '更新周期': 'UPDATE_FREQUENCY', '指标来源': 'DATA_SOURCE', '最后更新时间': 'LAST_UPDATE_DATE', '停更周期': 'UPDATE_SUSPENSION_CYCLE'})
|
||||||
# try:
|
# from sqlalchemy import create_engine
|
||||||
for row in update_y.itertuples(index=False):
|
# import urllib
|
||||||
try:
|
# global password
|
||||||
row_dict = row._asdict()
|
# if '@' in password:
|
||||||
yy = df[df['ds'] == row_dict['ds']]['y'].values[0]
|
# password = urllib.parse.quote_plus(password)
|
||||||
LOW = df[df['ds'] == row_dict['ds']]['Brentzdj'].values[0]
|
|
||||||
HIGH = df[df['ds'] == row_dict['ds']]['Brentzgj'].values[0]
|
|
||||||
sqlitedb.update_data(
|
|
||||||
'accuracy', f"y = {yy},LOW_PRICE = {LOW},HIGH_PRICE = {HIGH}", where_condition=f"ds = '{row_dict['ds']}'")
|
|
||||||
except:
|
|
||||||
logger.info(f'更新accuracy表的y值失败:{row_dict}')
|
|
||||||
# except Exception as e:
|
|
||||||
# logger.info(f'更新accuracy表的y值失败:{e}')
|
|
||||||
|
|
||||||
# 判断当前日期是不是周一
|
# engine = create_engine(
|
||||||
is_weekday = datetime.datetime.now().weekday() == 0
|
# f'mysql+pymysql://{dbusername}:{password}@{host}:{port}/{dbname}')
|
||||||
if is_weekday:
|
# warning_data_df['WARNING_DATE'] = datetime.date.today().strftime(
|
||||||
logger.info('今天是周一,更新预测模型')
|
# "%Y-%m-%d %H:%M:%S")
|
||||||
# 计算最近60天预测残差最低的模型名称
|
# warning_data_df['TENANT_CODE'] = 'T0004'
|
||||||
model_results = sqlitedb.select_data(
|
# # 插入数据之前查询表数据然后新增id列
|
||||||
'trueandpredict', order_by="ds DESC", limit="60")
|
# existing_data = pd.read_sql(f"SELECT * FROM {table_name}", engine)
|
||||||
# 删除空值率为90%以上的列
|
# if not existing_data.empty:
|
||||||
if len(model_results) > 10:
|
# max_id = existing_data['ID'].astype(int).max()
|
||||||
model_results = model_results.dropna(
|
# warning_data_df['ID'] = range(
|
||||||
thresh=len(model_results)*0.1, axis=1)
|
# max_id + 1, max_id + 1 + len(warning_data_df))
|
||||||
# 删除空行
|
# else:
|
||||||
model_results = model_results.dropna()
|
# warning_data_df['ID'] = range(1, 1 + len(warning_data_df))
|
||||||
modelnames = model_results.columns.to_list()[2:-1]
|
# warning_data_df.to_sql(
|
||||||
for col in model_results[modelnames].select_dtypes(include=['object']).columns:
|
# table_name, con=engine, if_exists='append', index=False)
|
||||||
model_results[col] = model_results[col].astype(np.float32)
|
# if is_update_warning_data:
|
||||||
# 计算每个预测值与真实值之间的偏差率
|
# upload_warning_info(len(warning_data_df))
|
||||||
for model in modelnames:
|
# except:
|
||||||
model_results[f'{model}_abs_error_rate'] = abs(
|
# logger.info('上传预警信息到数据库失败')
|
||||||
model_results['y'] - model_results[model]) / model_results['y']
|
|
||||||
# 获取每行对应的最小偏差率值
|
|
||||||
min_abs_error_rate_values = model_results.apply(
|
|
||||||
lambda row: row[[f'{model}_abs_error_rate' for model in modelnames]].min(), axis=1)
|
|
||||||
# 获取每行对应的最小偏差率值对应的列名
|
|
||||||
min_abs_error_rate_column_name = model_results.apply(
|
|
||||||
lambda row: row[[f'{model}_abs_error_rate' for model in modelnames]].idxmin(), axis=1)
|
|
||||||
# 将列名索引转换为列名
|
|
||||||
min_abs_error_rate_column_name = min_abs_error_rate_column_name.map(
|
|
||||||
lambda x: x.split('_')[0])
|
|
||||||
# 取出现次数最多的模型名称
|
|
||||||
most_common_model = min_abs_error_rate_column_name.value_counts().idxmax()
|
|
||||||
logger.info(f"最近60天预测残差最低的模型名称:{most_common_model}")
|
|
||||||
# 保存结果到数据库
|
|
||||||
if not sqlitedb.check_table_exists('most_model'):
|
|
||||||
sqlitedb.create_table(
|
|
||||||
'most_model', columns="ds datetime, most_common_model TEXT")
|
|
||||||
sqlitedb.insert_data('most_model', (datetime.datetime.now().strftime(
|
|
||||||
'%Y-%m-%d %H:%M:%S'), most_common_model,), columns=('ds', 'most_common_model',))
|
|
||||||
|
|
||||||
try:
|
# if is_corr:
|
||||||
if is_weekday:
|
# df = corr_feature(df=df)
|
||||||
# if True:
|
|
||||||
logger.info('今天是周一,发送特征预警')
|
|
||||||
# 上传预警信息到数据库
|
|
||||||
warning_data_df = df_zhibiaoliebiao.copy()
|
|
||||||
warning_data_df = warning_data_df[warning_data_df['停更周期'] > 3][[
|
|
||||||
'指标名称', '指标id', '频度', '更新周期', '指标来源', '最后更新时间', '停更周期']]
|
|
||||||
# 重命名列名
|
|
||||||
warning_data_df = warning_data_df.rename(columns={'指标名称': 'INDICATOR_NAME', '指标id': 'INDICATOR_ID', '频度': 'FREQUENCY',
|
|
||||||
'更新周期': 'UPDATE_FREQUENCY', '指标来源': 'DATA_SOURCE', '最后更新时间': 'LAST_UPDATE_DATE', '停更周期': 'UPDATE_SUSPENSION_CYCLE'})
|
|
||||||
from sqlalchemy import create_engine
|
|
||||||
import urllib
|
|
||||||
global password
|
|
||||||
if '@' in password:
|
|
||||||
password = urllib.parse.quote_plus(password)
|
|
||||||
|
|
||||||
engine = create_engine(
|
# df1 = df.copy() # 备份一下,后面特征筛选完之后加入ds y 列用
|
||||||
f'mysql+pymysql://{dbusername}:{password}@{host}:{port}/{dbname}')
|
# logger.info(f"开始训练模型...")
|
||||||
warning_data_df['WARNING_DATE'] = datetime.date.today().strftime(
|
# row, col = df.shape
|
||||||
"%Y-%m-%d %H:%M:%S")
|
|
||||||
warning_data_df['TENANT_CODE'] = 'T0004'
|
|
||||||
# 插入数据之前查询表数据然后新增id列
|
|
||||||
existing_data = pd.read_sql(f"SELECT * FROM {table_name}", engine)
|
|
||||||
if not existing_data.empty:
|
|
||||||
max_id = existing_data['ID'].astype(int).max()
|
|
||||||
warning_data_df['ID'] = range(
|
|
||||||
max_id + 1, max_id + 1 + len(warning_data_df))
|
|
||||||
else:
|
|
||||||
warning_data_df['ID'] = range(1, 1 + len(warning_data_df))
|
|
||||||
warning_data_df.to_sql(
|
|
||||||
table_name, con=engine, if_exists='append', index=False)
|
|
||||||
if is_update_warning_data:
|
|
||||||
upload_warning_info(len(warning_data_df))
|
|
||||||
except:
|
|
||||||
logger.info('上传预警信息到数据库失败')
|
|
||||||
|
|
||||||
if is_corr:
|
# now = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
|
||||||
df = corr_feature(df=df)
|
# ex_Model(df,
|
||||||
|
# horizon=global_config['horizon'],
|
||||||
|
# input_size=global_config['input_size'],
|
||||||
|
# train_steps=global_config['train_steps'],
|
||||||
|
# val_check_steps=global_config['val_check_steps'],
|
||||||
|
# early_stop_patience_steps=global_config['early_stop_patience_steps'],
|
||||||
|
# is_debug=global_config['is_debug'],
|
||||||
|
# dataset=global_config['dataset'],
|
||||||
|
# is_train=global_config['is_train'],
|
||||||
|
# is_fivemodels=global_config['is_fivemodels'],
|
||||||
|
# val_size=global_config['val_size'],
|
||||||
|
# test_size=global_config['test_size'],
|
||||||
|
# settings=global_config['settings'],
|
||||||
|
# now=now,
|
||||||
|
# etadata=global_config['etadata'],
|
||||||
|
# modelsindex=global_config['modelsindex'],
|
||||||
|
# data=data,
|
||||||
|
# is_eta=global_config['is_eta'],
|
||||||
|
# end_time=global_config['end_time'],
|
||||||
|
# )
|
||||||
|
|
||||||
df1 = df.copy() # 备份一下,后面特征筛选完之后加入ds y 列用
|
# logger.info('模型训练完成')
|
||||||
logger.info(f"开始训练模型...")
|
|
||||||
row, col = df.shape
|
|
||||||
|
|
||||||
now = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
|
|
||||||
ex_Model(df,
|
|
||||||
horizon=global_config['horizon'],
|
|
||||||
input_size=global_config['input_size'],
|
|
||||||
train_steps=global_config['train_steps'],
|
|
||||||
val_check_steps=global_config['val_check_steps'],
|
|
||||||
early_stop_patience_steps=global_config['early_stop_patience_steps'],
|
|
||||||
is_debug=global_config['is_debug'],
|
|
||||||
dataset=global_config['dataset'],
|
|
||||||
is_train=global_config['is_train'],
|
|
||||||
is_fivemodels=global_config['is_fivemodels'],
|
|
||||||
val_size=global_config['val_size'],
|
|
||||||
test_size=global_config['test_size'],
|
|
||||||
settings=global_config['settings'],
|
|
||||||
now=now,
|
|
||||||
etadata=global_config['etadata'],
|
|
||||||
modelsindex=global_config['modelsindex'],
|
|
||||||
data=data,
|
|
||||||
is_eta=global_config['is_eta'],
|
|
||||||
end_time=global_config['end_time'],
|
|
||||||
)
|
|
||||||
|
|
||||||
logger.info('模型训练完成')
|
|
||||||
|
|
||||||
logger.info('训练数据绘图ing')
|
logger.info('训练数据绘图ing')
|
||||||
model_results3 = model_losss(sqlitedb, end_time=end_time)
|
model_results3 = model_losss(sqlitedb, end_time=end_time)
|
||||||
@ -421,15 +403,15 @@ def predict_main():
|
|||||||
# 模型报告
|
# 模型报告
|
||||||
logger.info('制作报告ing')
|
logger.info('制作报告ing')
|
||||||
title = f'{settings}--{end_time}-预测报告' # 报告标题
|
title = f'{settings}--{end_time}-预测报告' # 报告标题
|
||||||
reportname = f'Brent原油大模型日度预测--{end_time}.pdf' # 报告文件名
|
reportname = f'石油焦铝用大模型日度预测--{end_time}.pdf' # 报告文件名
|
||||||
reportname = reportname.replace(':', '-') # 替换冒号
|
reportname = reportname.replace(':', '-') # 替换冒号
|
||||||
brent_export_pdf(dataset=dataset, num_models=5 if is_fivemodels else 22, time=end_time,
|
shiyoujiao_lvyong_export_pdf(dataset=dataset, num_models=5 if is_fivemodels else 22, time=end_time,
|
||||||
reportname=reportname, sqlitedb=sqlitedb),
|
reportname=reportname, sqlitedb=sqlitedb),
|
||||||
|
|
||||||
logger.info('制作报告end')
|
logger.info('制作报告end')
|
||||||
logger.info('模型训练完成')
|
logger.info('模型训练完成')
|
||||||
|
|
||||||
push_market_value()
|
# push_market_value()
|
||||||
|
|
||||||
# # LSTM 单变量模型
|
# # LSTM 单变量模型
|
||||||
# ex_Lstm(df,input_seq_len=input_size,output_seq_len=horizon,is_debug=is_debug,dataset=dataset)
|
# ex_Lstm(df,input_seq_len=input_size,output_seq_len=horizon,is_debug=is_debug,dataset=dataset)
|
||||||
|
@ -301,7 +301,7 @@ def predict_main():
|
|||||||
thresh=len(model_results)*0.1, axis=1)
|
thresh=len(model_results)*0.1, axis=1)
|
||||||
# 删除空行
|
# 删除空行
|
||||||
model_results = model_results.dropna()
|
model_results = model_results.dropna()
|
||||||
modelnames = model_results.columns.to_list()[2:-1]
|
modelnames = model_results.columns.to_list()[2:-2]
|
||||||
for col in model_results[modelnames].select_dtypes(include=['object']).columns:
|
for col in model_results[modelnames].select_dtypes(include=['object']).columns:
|
||||||
model_results[col] = model_results[col].astype(np.float32)
|
model_results[col] = model_results[col].astype(np.float32)
|
||||||
# 计算每个预测值与真实值之间的偏差率
|
# 计算每个预测值与真实值之间的偏差率
|
||||||
|
@ -866,7 +866,7 @@ def model_losss_yongan(sqlitedb, end_time, table_name_prefix):
|
|||||||
plt.text(i, j, str(j), ha='center', va='bottom')
|
plt.text(i, j, str(j), ha='center', va='bottom')
|
||||||
|
|
||||||
# 当前日期画竖虚线
|
# 当前日期画竖虚线
|
||||||
plt.axvline(x=df['ds'].iloc[-horizon], color='r', linestyle='--')
|
plt.axvline(x=df['ds'].iloc[-config.horizon], color='r', linestyle='--')
|
||||||
plt.legend()
|
plt.legend()
|
||||||
plt.xlabel('日期')
|
plt.xlabel('日期')
|
||||||
plt.ylabel('价格')
|
plt.ylabel('价格')
|
||||||
@ -881,8 +881,8 @@ def model_losss_yongan(sqlitedb, end_time, table_name_prefix):
|
|||||||
ax.axis('off') # 关闭坐标轴
|
ax.axis('off') # 关闭坐标轴
|
||||||
# 数值保留2位小数
|
# 数值保留2位小数
|
||||||
df = df.round(2)
|
df = df.round(2)
|
||||||
df = df[-horizon:]
|
df = df[-config.horizon:]
|
||||||
df['Day'] = [f'Day_{i}' for i in range(1, horizon+1)]
|
df['Day'] = [f'Day_{i}' for i in range(1, config.horizon+1)]
|
||||||
# Day列放到最前面
|
# Day列放到最前面
|
||||||
df = df[['Day'] + list(df.columns[:-1])]
|
df = df[['Day'] + list(df.columns[:-1])]
|
||||||
table = ax.table(cellText=df.values,
|
table = ax.table(cellText=df.values,
|
||||||
@ -1297,7 +1297,7 @@ def model_losss(sqlitedb, end_time):
|
|||||||
# plt.plot(df['ds'], df[model], label=model,marker='o')
|
# plt.plot(df['ds'], df[model], label=model,marker='o')
|
||||||
plt.plot(df['ds'], df[most_model_name], label=model, marker='o')
|
plt.plot(df['ds'], df[most_model_name], label=model, marker='o')
|
||||||
# 当前日期画竖虚线
|
# 当前日期画竖虚线
|
||||||
plt.axvline(x=df['ds'].iloc[-horizon], color='r', linestyle='--')
|
plt.axvline(x=df['ds'].iloc[-config.horizon], color='r', linestyle='--')
|
||||||
plt.legend()
|
plt.legend()
|
||||||
plt.xlabel('日期')
|
plt.xlabel('日期')
|
||||||
# 设置横轴日期格式为年-月-日
|
# 设置横轴日期格式为年-月-日
|
||||||
@ -1338,7 +1338,7 @@ def model_losss(sqlitedb, end_time):
|
|||||||
plt.text(i, j, str(j), ha='center', va='bottom')
|
plt.text(i, j, str(j), ha='center', va='bottom')
|
||||||
|
|
||||||
# 当前日期画竖虚线
|
# 当前日期画竖虚线
|
||||||
plt.axvline(x=df['ds'].iloc[-horizon], color='r', linestyle='--')
|
plt.axvline(x=df['ds'].iloc[-config.horizon], color='r', linestyle='--')
|
||||||
plt.legend()
|
plt.legend()
|
||||||
plt.xlabel('日期')
|
plt.xlabel('日期')
|
||||||
# 自动设置横轴日期显示
|
# 自动设置横轴日期显示
|
||||||
@ -1357,8 +1357,8 @@ def model_losss(sqlitedb, end_time):
|
|||||||
ax.axis('off') # 关闭坐标轴
|
ax.axis('off') # 关闭坐标轴
|
||||||
# 数值保留2位小数
|
# 数值保留2位小数
|
||||||
df = df.round(2)
|
df = df.round(2)
|
||||||
df = df[-horizon:]
|
df = df[-config.horizon:]
|
||||||
df['Day'] = [f'Day_{i}' for i in range(1, horizon+1)]
|
df['Day'] = [f'Day_{i}' for i in range(1, config.horizon+1)]
|
||||||
# Day列放到最前面
|
# Day列放到最前面
|
||||||
df = df[['Day'] + list(df.columns[:-1])]
|
df = df[['Day'] + list(df.columns[:-1])]
|
||||||
table = ax.table(cellText=df.values,
|
table = ax.table(cellText=df.values,
|
||||||
@ -1388,10 +1388,10 @@ def model_losss(sqlitedb, end_time):
|
|||||||
bbox_inches='tight')
|
bbox_inches='tight')
|
||||||
plt.close()
|
plt.close()
|
||||||
|
|
||||||
# _plt_predict_ture(df_combined3)
|
_plt_predict_ture(df_combined3)
|
||||||
# _plt_modeltopten_predict_ture(df_combined4)
|
# _plt_modeltopten_predict_ture(df_combined4)
|
||||||
# _plt_predict_table(df_combined3)
|
_plt_predict_table(df_combined3)
|
||||||
# _plt_model_results3()
|
_plt_model_results3()
|
||||||
|
|
||||||
return model_results3
|
return model_results3
|
||||||
|
|
||||||
@ -2461,6 +2461,319 @@ def brent_export_pdf(num_indicators=475, num_models=21, num_dayindicator=202, in
|
|||||||
print(f"请求超时: {e}")
|
print(f"请求超时: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
@exception_logger
|
||||||
|
def shiyoujiao_lvyong_export_pdf(num_indicators=475, num_models=21, num_dayindicator=202, inputsize=5, dataset='dataset', time='2024-07-30', reportname='report.pdf', sqlitedb='jbsh_yuanyou.db'):
|
||||||
|
global y
|
||||||
|
# 创建内容对应的空列表
|
||||||
|
content = list()
|
||||||
|
# 获取特征的近一月值
|
||||||
|
import pandas as pd
|
||||||
|
feature_data_df = pd.read_csv(os.path.join(
|
||||||
|
config.dataset,'指标数据添加时间特征.csv'), parse_dates=['ds']).tail(60)
|
||||||
|
|
||||||
|
def draw_feature_trend(feature_data_df, features):
|
||||||
|
# 画特征近60天的趋势图
|
||||||
|
feature_df = feature_data_df[['ds', 'y']+features]
|
||||||
|
# 遍历X每一列,和yy画散点图 ,
|
||||||
|
|
||||||
|
for i, col in enumerate(features):
|
||||||
|
# try:
|
||||||
|
print(f'正在绘制第{i+1}个特征{col}与价格散点图...')
|
||||||
|
if col not in ['ds', 'y']:
|
||||||
|
fig, ax1 = plt.subplots(figsize=(10, 6))
|
||||||
|
# 在第一个坐标轴上绘制数据
|
||||||
|
sns.lineplot(data=feature_df, x='ds', y='y', ax=ax1, color='b')
|
||||||
|
ax1.set_xlabel('日期')
|
||||||
|
ax1.set_ylabel('y', color='b')
|
||||||
|
ax1.tick_params('y', colors='b')
|
||||||
|
# 在 ax1 上添加文本显示值,添加一定的偏移避免值与曲线重叠
|
||||||
|
for j in range(1, len(feature_df), 2):
|
||||||
|
value = feature_df['y'].iloc[j]
|
||||||
|
date = feature_df['ds'].iloc[j]
|
||||||
|
offset = 1.001
|
||||||
|
ax1.text(date, value * offset, str(round(value, 2)),
|
||||||
|
ha='center', va='bottom', color='b', fontsize=10)
|
||||||
|
# 创建第二个坐标轴
|
||||||
|
ax2 = ax1.twinx()
|
||||||
|
# 在第二个坐标轴上绘制数据
|
||||||
|
sns.lineplot(data=feature_df, x='ds', y=col, ax=ax2, color='r')
|
||||||
|
ax2.set_ylabel(col, color='r')
|
||||||
|
ax2.tick_params('y', colors='r')
|
||||||
|
# 在 ax2 上添加文本显示值,添加一定的偏移避免值与曲线重叠
|
||||||
|
for j in range(0, len(feature_df), 2):
|
||||||
|
value = feature_df[col].iloc[j]
|
||||||
|
date = feature_df['ds'].iloc[j]
|
||||||
|
offset = 1.0003
|
||||||
|
ax2.text(date, value * offset, str(round(value, 2)),
|
||||||
|
ha='center', va='bottom', color='r', fontsize=10)
|
||||||
|
# 添加标题
|
||||||
|
plt.title(col)
|
||||||
|
# 设置横坐标为日期格式并自动调整
|
||||||
|
locator = mdates.AutoDateLocator()
|
||||||
|
formatter = mdates.AutoDateFormatter(locator)
|
||||||
|
ax1.xaxis.set_major_locator(locator)
|
||||||
|
ax1.xaxis.set_major_formatter(formatter)
|
||||||
|
# 文件名特殊字符处理
|
||||||
|
col = col.replace('*', '-')
|
||||||
|
col = col.replace(':', '-')
|
||||||
|
col = col.replace(r'/', '-')
|
||||||
|
plt.savefig(os.path.join(config.dataset, f'{col}与价格散点图.png'))
|
||||||
|
content.append(Graphs.draw_img(
|
||||||
|
os.path.join(config.dataset, f'{col}与价格散点图.png')))
|
||||||
|
plt.close()
|
||||||
|
# except Exception as e:
|
||||||
|
# print(f'绘制第{i+1}个特征{col}与价格散点图时出错:{e}')
|
||||||
|
|
||||||
|
# 添加标题
|
||||||
|
content.append(Graphs.draw_title(f'{config.y}{time}预测报告'))
|
||||||
|
|
||||||
|
# 预测结果
|
||||||
|
content.append(Graphs.draw_little_title('一、预测结果:'))
|
||||||
|
# 添加历史走势及预测价格的走势图片
|
||||||
|
content.append(Graphs.draw_img(os.path.join(config.dataset, '历史价格-预测值.png')))
|
||||||
|
# 波动率画图逻辑
|
||||||
|
content.append(Graphs.draw_text('图示说明:'))
|
||||||
|
content.append(Graphs.draw_text(
|
||||||
|
' 确定置信区间:设置残差置信阈值,以每周最佳模型为基准,选取在置信区间的预测值作为置信区间;'))
|
||||||
|
|
||||||
|
|
||||||
|
# 取df中y列为空的行
|
||||||
|
import pandas as pd
|
||||||
|
df = pd.read_csv(os.path.join(config.dataset, 'predict.csv'), encoding='gbk')
|
||||||
|
df_true = pd.read_csv(os.path.join(
|
||||||
|
config.dataset,'指标数据添加时间特征.csv'), encoding='utf-8') # 获取预测日期对应的真实值
|
||||||
|
df_true = df_true[['ds', 'y']]
|
||||||
|
eval_df = pd.read_csv(os.path.join(
|
||||||
|
config.dataset,'model_evaluation.csv'), encoding='utf-8')
|
||||||
|
# 按评估指标排序,取前五
|
||||||
|
fivemodels_list = eval_df['模型(Model)'].values # 列表形式,后面当作列名索引使用
|
||||||
|
# 取 fivemodels_list 和 ds 列
|
||||||
|
df = df[['ds'] + fivemodels_list.tolist()]
|
||||||
|
# 拼接预测日期对应的真实值
|
||||||
|
df = pd.merge(df, df_true, on='ds', how='left')
|
||||||
|
# 删除全部为nan的列
|
||||||
|
df = df.dropna(how='all', axis=1)
|
||||||
|
# 选择除 'ds' 列外的数值列,并进行类型转换和四舍五入
|
||||||
|
num_cols = [col for col in df.columns if col !=
|
||||||
|
'ds' and pd.api.types.is_numeric_dtype(df[col])]
|
||||||
|
for col in num_cols:
|
||||||
|
df[col] = df[col].astype(float).round(2)
|
||||||
|
# 添加最大值、最小值、平均值三列
|
||||||
|
df['平均值'] = df[num_cols].mean(axis=1).round(2)
|
||||||
|
df['最大值'] = df[num_cols].max(axis=1)
|
||||||
|
df['最小值'] = df[num_cols].min(axis=1)
|
||||||
|
# df转置
|
||||||
|
df = df.T
|
||||||
|
# df重置索引
|
||||||
|
df = df.reset_index()
|
||||||
|
# 添加预测值表格
|
||||||
|
data = df.values.tolist()
|
||||||
|
col_width = 500/len(df.columns)
|
||||||
|
content.append(Graphs.draw_table(col_width, *data))
|
||||||
|
content.append(Graphs.draw_little_title('二、上一预测周期偏差率分析:'))
|
||||||
|
df = pd.read_csv(os.path.join(
|
||||||
|
config.dataset,'testandpredict_groupby.csv'), encoding='utf-8')
|
||||||
|
df4 = df.copy() # 计算偏差率使用
|
||||||
|
# 去掉created_dt 列
|
||||||
|
df4 = df4.drop(columns=['created_dt'])
|
||||||
|
# 计算模型偏差率
|
||||||
|
# 计算各列对于y列的差值百分比
|
||||||
|
df3 = pd.DataFrame() # 存储偏差率
|
||||||
|
|
||||||
|
# 删除有null的行
|
||||||
|
df4 = df4.dropna()
|
||||||
|
df3['ds'] = df4['ds']
|
||||||
|
for col in fivemodels_list:
|
||||||
|
df3[col] = round(abs(df4[col] - df4['y']) / df4['y'] * 100, 2)
|
||||||
|
# 找出决定系数前五的偏差率
|
||||||
|
df3 = df3[['ds']+fivemodels_list.tolist()][-inputsize:]
|
||||||
|
# 找出上一预测区间的时间
|
||||||
|
stime = df3['ds'].iloc[0]
|
||||||
|
etime = df3['ds'].iloc[-1]
|
||||||
|
# 添加偏差率表格
|
||||||
|
fivemodels = '、'.join(eval_df['模型(Model)'].values[:5]) # 字符串形式,后面写入字符串使用
|
||||||
|
content.append(Graphs.draw_text(
|
||||||
|
f'预测使用了{num_models}个模型进行训练,使用评估结果MAE前五的模型分别是 {fivemodels} ,模型上一预测区间 {stime} -- {etime}的偏差率(%)分别是:'))
|
||||||
|
# # 添加偏差率表格
|
||||||
|
df3 = df3.T
|
||||||
|
df3 = df3.reset_index()
|
||||||
|
data = df3.values.tolist()
|
||||||
|
col_width = 500/len(df3.columns)
|
||||||
|
content.append(Graphs.draw_table(col_width, *data))
|
||||||
|
|
||||||
|
content.append(Graphs.draw_little_title('上一周预测准确率:'))
|
||||||
|
df4 = sqlitedb.select_data('accuracy_rote', order_by='结束日期 desc', limit=1)
|
||||||
|
df4 = df4.T
|
||||||
|
df4 = df4.reset_index()
|
||||||
|
df4 = df4.T
|
||||||
|
data = df4.values.tolist()
|
||||||
|
col_width = 500/len(df4.columns)
|
||||||
|
content.append(Graphs.draw_table(col_width, *data))
|
||||||
|
|
||||||
|
content.append(Graphs.draw_little_title('三、预测过程解析:'))
|
||||||
|
# 特征、模型、参数配置
|
||||||
|
content.append(Graphs.draw_little_title('模型选择:'))
|
||||||
|
content.append(Graphs.draw_text(
|
||||||
|
f'本次预测使用了一个专门收集时间序列的NeuralForecast库中的{num_models}个模型:'))
|
||||||
|
content.append(Graphs.draw_text(f'使用40天的数据预测未来{inputsize}天的数据。'))
|
||||||
|
content.append(Graphs.draw_little_title('指标情况:'))
|
||||||
|
with open(os.path.join(config.dataset, '特征频度统计.txt'), encoding='utf-8') as f:
|
||||||
|
for line in f.readlines():
|
||||||
|
content.append(Graphs.draw_text(line))
|
||||||
|
|
||||||
|
data = pd.read_csv(os.path.join(config.dataset, '指标数据添加时间特征.csv'),
|
||||||
|
encoding='utf-8') # 计算相关系数用
|
||||||
|
df_zhibiaofenlei = loadcsv(os.path.join(
|
||||||
|
config.dataset,'特征处理后的指标名称及分类.csv')) # 气泡图用
|
||||||
|
df_zhibiaoshuju = data.copy() # 气泡图用
|
||||||
|
|
||||||
|
# 绘制特征相关气泡图
|
||||||
|
|
||||||
|
grouped = df_zhibiaofenlei.groupby('指标分类')
|
||||||
|
grouped_corr = pd.DataFrame(columns=['指标分类', '指标数量', '相关性总和'])
|
||||||
|
|
||||||
|
content.append(Graphs.draw_little_title('按指标分类分别与预测目标进行皮尔逊相关系数分析:'))
|
||||||
|
content.append(Graphs.draw_text('''皮尔逊相关系数说明:'''))
|
||||||
|
content.append(Graphs.draw_text('''衡量两个特征之间的线性相关性。'''))
|
||||||
|
content.append(Graphs.draw_text('''
|
||||||
|
相关系数为1:表示两个变量之间存在完全正向的线性关系,即当一个变量增加时,另一个变量也相应增加,且变化是完全一致的。'''))
|
||||||
|
content.append(Graphs.draw_text(
|
||||||
|
'''相关系数为-1:表示两个变量之间存在完全负向的线性关系,即当一个变量增加时,另一个变量会相应减少,且变化是完全相反的'''))
|
||||||
|
content.append(Graphs.draw_text(
|
||||||
|
'''相关系数接近0:表示两个变量之间不存在线性关系,即它们的变化不会随着对方的变化而变化。'''))
|
||||||
|
for name, group in grouped:
|
||||||
|
cols = group['指标名称'].tolist()
|
||||||
|
config.logger.info(f'开始绘制{name}类指标的相关性直方图')
|
||||||
|
cols_subset = cols
|
||||||
|
feature_names = ['y'] + cols_subset
|
||||||
|
correlation_matrix = df_zhibiaoshuju[feature_names].corr()['y']
|
||||||
|
|
||||||
|
# 绘制特征相关性直方分布图
|
||||||
|
plt.figure(figsize=(10, 8))
|
||||||
|
sns.histplot(correlation_matrix.values.flatten(),
|
||||||
|
bins=20, kde=True, color='skyblue')
|
||||||
|
plt.title(f'{name}类指标(共{len(cols_subset)}个)相关性直方分布图')
|
||||||
|
plt.xlabel('相关系数')
|
||||||
|
plt.ylabel('频数')
|
||||||
|
plt.savefig(os.path.join(
|
||||||
|
config.dataset,f'{name}类指标相关性直方分布图.png'), bbox_inches='tight')
|
||||||
|
plt.close()
|
||||||
|
content.append(Graphs.draw_img(
|
||||||
|
os.path.join(config.dataset, f'{name}类指标相关性直方分布图.png')))
|
||||||
|
content.append(Graphs.draw_text(
|
||||||
|
f'{name}类指标(共{len(cols_subset)}个)的相关性直方分布图如上所示。'))
|
||||||
|
# 相关性大于0的特征
|
||||||
|
positive_corr_features = correlation_matrix[correlation_matrix > 0].sort_values(
|
||||||
|
ascending=False).index.tolist()[1:]
|
||||||
|
|
||||||
|
print(f'{name}下正相关的特征值有:', positive_corr_features)
|
||||||
|
if len(positive_corr_features) > 5:
|
||||||
|
positive_corr_features = positive_corr_features[0:5]
|
||||||
|
content.append(Graphs.draw_text(
|
||||||
|
f'{name}类指标中,与预测目标y正相关前五的特征有:{positive_corr_features}'))
|
||||||
|
draw_feature_trend(feature_data_df, positive_corr_features)
|
||||||
|
elif len(positive_corr_features) == 0:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
positive_corr_features = positive_corr_features
|
||||||
|
content.append(Graphs.draw_text(
|
||||||
|
f'其中,与预测目标y正相关的特征有:{positive_corr_features}'))
|
||||||
|
draw_feature_trend(feature_data_df, positive_corr_features)
|
||||||
|
|
||||||
|
# 相关性小于0的特征
|
||||||
|
negative_corr_features = correlation_matrix[correlation_matrix < 0].sort_values(
|
||||||
|
ascending=True).index.tolist()
|
||||||
|
|
||||||
|
print(f'{name}下负相关的特征值有:', negative_corr_features)
|
||||||
|
if len(negative_corr_features) > 5:
|
||||||
|
negative_corr_features = negative_corr_features[:5]
|
||||||
|
content.append(Graphs.draw_text(
|
||||||
|
f'与预测目标y负相关前五的特征有:{negative_corr_features}'))
|
||||||
|
draw_feature_trend(feature_data_df, negative_corr_features)
|
||||||
|
elif len(negative_corr_features) == 0:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
content.append(Graphs.draw_text(
|
||||||
|
f'{name}类指标中,与预测目标y负相关的特征有:{negative_corr_features}'))
|
||||||
|
draw_feature_trend(feature_data_df, negative_corr_features)
|
||||||
|
# 计算correlation_sum 第一行的相关性的绝对值的总和
|
||||||
|
correlation_sum = correlation_matrix.abs().sum()
|
||||||
|
config.logger.info(f'{name}类指标的相关性总和为:{correlation_sum}')
|
||||||
|
# 分组的相关性总和拼接到grouped_corr
|
||||||
|
goup_corr = pd.DataFrame(
|
||||||
|
{'指标分类': [name], '指标数量': [len(cols_subset)], '相关性总和': [correlation_sum]})
|
||||||
|
grouped_corr = pd.concat(
|
||||||
|
[grouped_corr, goup_corr], axis=0, ignore_index=True)
|
||||||
|
|
||||||
|
# 绘制相关性总和的气泡图
|
||||||
|
config.logger.info(f'开始绘制相关性总和的气泡图')
|
||||||
|
plt.figure(figsize=(10, 10))
|
||||||
|
sns.scatterplot(data=grouped_corr, x='相关性总和', y='指标数量', size='相关性总和', sizes=(
|
||||||
|
grouped_corr['相关性总和'].min()*5, grouped_corr['相关性总和'].max()*5), hue='指标分类', palette='viridis')
|
||||||
|
plt.title('指标分类相关性总和的气泡图')
|
||||||
|
plt.ylabel('数量')
|
||||||
|
plt.savefig(os.path.join(config.dataset, '指标分类相关性总和的气泡图.png'),
|
||||||
|
bbox_inches='tight')
|
||||||
|
plt.close()
|
||||||
|
content.append(Graphs.draw_img(os.path.join(config.dataset, '指标分类相关性总和的气泡图.png')))
|
||||||
|
content.append(Graphs.draw_text(
|
||||||
|
'气泡图中,横轴为指标分类,纵轴为指标分类下的特征数量,气泡的面积越大表示该分类中特征的相关系数和越大。'))
|
||||||
|
config.logger.info(f'绘制相关性总和的气泡图结束')
|
||||||
|
content.append(Graphs.draw_little_title('模型选择:'))
|
||||||
|
content.append(Graphs.draw_text(
|
||||||
|
f'预测使用了{num_models}个模型进行训练拟合,通过评估指标MAE从小到大排列,前5个模型的简介如下:'))
|
||||||
|
# 读取模型简介
|
||||||
|
with open(os.path.join(config.dataset, 'model_introduction.txt'), 'r', encoding='utf-8') as f:
|
||||||
|
for line in f:
|
||||||
|
line_split = line.strip().split('--')
|
||||||
|
if line_split[0] in fivemodels_list:
|
||||||
|
for introduction in line_split:
|
||||||
|
content.append(Graphs.draw_text(introduction))
|
||||||
|
content.append(Graphs.draw_little_title('模型评估:'))
|
||||||
|
df = pd.read_csv(os.path.join(
|
||||||
|
config.dataset,'model_evaluation.csv'), encoding='utf-8')
|
||||||
|
# 判断 df 的数值列转为float
|
||||||
|
for col in eval_df.columns:
|
||||||
|
if col not in ['模型(Model)']:
|
||||||
|
eval_df[col] = eval_df[col].astype(float)
|
||||||
|
eval_df[col] = eval_df[col].round(3)
|
||||||
|
# 筛选 fivemodels_list.tolist() 的行
|
||||||
|
eval_df = eval_df[eval_df['模型(Model)'].isin(fivemodels_list)]
|
||||||
|
# df转置
|
||||||
|
eval_df = eval_df.T
|
||||||
|
# df重置索引
|
||||||
|
eval_df = eval_df.reset_index()
|
||||||
|
eval_df = eval_df.T
|
||||||
|
# # 添加表格
|
||||||
|
data = eval_df.values.tolist()
|
||||||
|
col_width = 500/len(eval_df.columns)
|
||||||
|
content.append(Graphs.draw_table(col_width, *data))
|
||||||
|
content.append(Graphs.draw_text('评估指标释义:'))
|
||||||
|
content.append(Graphs.draw_text(
|
||||||
|
'1. 均方根误差(RMSE):均方根误差是衡量预测值与实际值之间误差的一种方法,取值越小,误差越小,预测效果越好。'))
|
||||||
|
content.append(Graphs.draw_text(
|
||||||
|
'2. 平均绝对误差(MAE):平均绝对误差是衡量预测值与实际值之间误差的一种方法,取值越小,误差越小,预测效果越好。'))
|
||||||
|
content.append(Graphs.draw_text(
|
||||||
|
'3. 平均平方误差(MSE):平均平方误差是衡量预测值与实际值之间误差的一种方法,取值越小,误差越小,预测效果越好。'))
|
||||||
|
content.append(Graphs.draw_text('模型拟合:'))
|
||||||
|
# 添加图片
|
||||||
|
content.append(Graphs.draw_img(os.path.join(config.dataset, '预测值与真实值对比图.png')))
|
||||||
|
# 生成pdf文件
|
||||||
|
doc = SimpleDocTemplate(os.path.join(config.dataset, reportname), pagesize=letter)
|
||||||
|
doc.build(content)
|
||||||
|
# pdf 上传到数字化信息平台
|
||||||
|
try:
|
||||||
|
if config.is_update_report:
|
||||||
|
with open(os.path.join(config.dataset, reportname), 'rb') as f:
|
||||||
|
base64_data = base64.b64encode(f.read()).decode('utf-8')
|
||||||
|
upload_data["data"]["fileBase64"] = base64_data
|
||||||
|
upload_data["data"]["fileName"] = reportname
|
||||||
|
token = get_head_auth_report()
|
||||||
|
upload_report_data(token, upload_data)
|
||||||
|
except TimeoutError as e:
|
||||||
|
print(f"请求超时: {e}")
|
||||||
|
|
||||||
|
|
||||||
@exception_logger
|
@exception_logger
|
||||||
def pp_export_pdf(num_indicators=475, num_models=21, num_dayindicator=202, inputsize=5, dataset='dataset', time='2024-07-30', reportname='report.pdf', sqlitedb='jbsh_yuanyou.db'):
|
def pp_export_pdf(num_indicators=475, num_models=21, num_dayindicator=202, inputsize=5, dataset='dataset', time='2024-07-30', reportname='report.pdf', sqlitedb='jbsh_yuanyou.db'):
|
||||||
# 创建内容对应的空列表
|
# 创建内容对应的空列表
|
||||||
|
667
test/ETA获取数据.py
667
test/ETA获取数据.py
File diff suppressed because it is too large
Load Diff
BIN
成品油eta数据.xlsx
BIN
成品油eta数据.xlsx
Binary file not shown.
BIN
沥青eta数据.xlsx
BIN
沥青eta数据.xlsx
Binary file not shown.
Loading…
Reference in New Issue
Block a user