添加线上环境配置

This commit is contained in:
workpc 2024-12-27 14:15:20 +08:00
parent 48bea095a0
commit 9f209d0b3d
13 changed files with 2532 additions and 2521 deletions

View File

@ -2,165 +2,48 @@
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"id": "9048d286-415f-4a33-be3a-9d7099aa85de",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"from config_jingbo import *"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "07e338e7-1dd1-417f-b4e2-65d0efc983d6",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 33,
"id": "659eec10-f0b8-4d65-b70e-6f07f45291bc",
"metadata": {},
"outputs": [],
"source": [
"y_last_update_time = df[df['feature']=='y']['warning_date'].values[0]\n"
]
},
{
"cell_type": "code",
"execution_count": 49,
"id": "31035e6f-36e8-4b17-a14b-f7dd79ae085a",
"execution_count": 6,
"id": "b3cde8ea",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>last_update_time</th>\n",
" <th>feature</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>500</th>\n",
" <td>2024-11-14</td>\n",
" <td>lmcads03 lme comdty</td>\n",
" </tr>\n",
" <tr>\n",
" <th>251</th>\n",
" <td>2024-11-14</td>\n",
" <td>美国MEH轻原油现货价格美湾</td>\n",
" </tr>\n",
" <tr>\n",
" <th>231</th>\n",
" <td>2024-11-14</td>\n",
" <td>WTI原油Cushing-C1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>264</th>\n",
" <td>2024-11-14</td>\n",
" <td>LLS原油现货价格</td>\n",
" </tr>\n",
" <tr>\n",
" <th>82</th>\n",
" <td>2024-11-14</td>\n",
" <td>OMAN原油中质远期现货DES价山东</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>83</th>\n",
" <td>2023-01-03</td>\n",
" <td>PVM Dubai EFS/Swap vs ICE First Month Brent</td>\n",
" </tr>\n",
" <tr>\n",
" <th>91</th>\n",
" <td>2022-11-16</td>\n",
" <td>Brent/迪拜原油升贴水</td>\n",
" </tr>\n",
" <tr>\n",
" <th>94</th>\n",
" <td>2022-01-31</td>\n",
" <td>W-B活跃合约价差</td>\n",
" </tr>\n",
" <tr>\n",
" <th>95</th>\n",
" <td>2022-01-31</td>\n",
" <td>B-W活跃合约价差</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31</th>\n",
" <td>2018-10-24</td>\n",
" <td>西北欧即期Brent 211利润</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>73 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" last_update_time feature\n",
"500 2024-11-14 lmcads03 lme comdty\n",
"251 2024-11-14 美国MEH轻原油现货价格美湾\n",
"231 2024-11-14 WTI原油Cushing-C1\n",
"264 2024-11-14 LLS原油现货价格\n",
"82 2024-11-14 OMAN原油中质远期现货DES价山东\n",
".. ... ...\n",
"83 2023-01-03 PVM Dubai EFS/Swap vs ICE First Month Brent\n",
"91 2022-11-16 Brent/迪拜原油升贴水\n",
"94 2022-01-31 W-B活跃合约价差\n",
"95 2022-01-31 B-W活跃合约价差\n",
"31 2018-10-24 西北欧即期Brent 211利润\n",
"\n",
"[73 rows x 2 columns]"
]
},
"execution_count": 49,
"metadata": {},
"output_type": "execute_result"
"name": "stdout",
"output_type": "stream",
"text": [
"{'ovx index': '原油波动率', 'dxy curncy': '美元指数', 'C2403128043': 'Brent连1合约价格拟合残差/美元指数', 'C2403150124': 'Brent连1合约价格拟合残差/Brent 连2-连3', 'DOESCRUD Index': '美国商业原油库存', 'FVHCM1 INDEX': '美国取暖油裂解C1', 'doedtprd index': '美国成品油表需', 'CFFDQMMN INDEX': 'WTI管理资金净多持仓', 'C2403083739': 'WTI基金多空持仓比', 'C2404167878': 'WTI基金净持仓COT指标代码运算', 'lmcads03 lme comdty': 'LME铜价', 'GC1 COMB Comdty': '黄金连1合约', 'C2404167855': '金油比'}\n"
]
}
],
"source": [
"df[df['warning_date']<y_last_update_time][['last_update_time','feature']]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "937a9adb-1652-4d6b-a5c7-828d7c17769d",
"metadata": {},
"outputs": [],
"source": [
"df2 = pd.read_csv(os.path.join(dataset,''))"
"data = \"\"\"\n",
"ovx index 原油波动率\n",
"dxy curncy 美元指数\n",
"C2403128043 Brent连1合约价格拟合残差/美元指数\n",
"C2403150124 Brent连1合约价格拟合残差/Brent 连2-连3\n",
"DOESCRUD Index 美国商业原油库存\n",
"FVHCM1 INDEX 美国取暖油裂解C1\n",
"doedtprd index 美国成品油表需\n",
"CFFDQMMN INDEX WTI管理资金净多持仓\n",
"C2403083739 WTI基金多空持仓比\n",
"C2404167878 WTI基金净持仓COT指标代码运算\n",
"lmcads03 lme comdty LME铜价\n",
"GC1 COMB Comdty 黄金连1合约\n",
"C2404167855 金油比\n",
"\"\"\"\n",
"\n",
"result_dict = {}\n",
"lines = data.strip().split('\\n')\n",
"for line in lines:\n",
" key, value = line.strip().split(' ')\n",
" result_dict[key] = value\n",
"\n",
"print(result_dict)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "base",
"language": "python",
"name": "python3"
},

View File

@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
@ -471,22 +471,23 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"20241217\n"
"20241224\n",
"20241225\n"
]
}
],
"source": [
"from datetime import datetime, timedelta\n",
"\n",
"start_date = datetime(2024, 12, 17)\n",
"end_date = datetime(2024, 12, 18)\n",
"start_date = datetime(2024, 12, 24)\n",
"end_date = datetime(2024, 12, 26)\n",
"\n",
"while start_date < end_date:\n",
" print(start_date.strftime('%Y%m%d'))\n",

View File

@ -795,14 +795,14 @@
"name": "stdout",
"output_type": "stream",
"text": [
"20241217\n"
"20241223\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_4500\\2239815117.py:299: UserWarning:\n",
"C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_2496\\2239815117.py:299: UserWarning:\n",
"\n",
"The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
"\n"
@ -812,7 +812,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Using matplotlib backend: <object object at 0x0000017F4168F090>\n",
"Using matplotlib backend: <object object at 0x0000027F8B613090>\n",
"%pylab is deprecated, use %matplotlib inline and import the required libraries.\n",
"Populating the interactive namespace from numpy and matplotlib\n",
"Fitting 3 folds for each of 180 candidates, totalling 540 fits\n"
@ -824,7 +824,7 @@
"text": [
"d:\\ProgramData\\anaconda3\\Lib\\site-packages\\IPython\\core\\magics\\pylab.py:162: UserWarning:\n",
"\n",
"pylab import has clobbered these variables: ['__version__', 'plot', 'random', 'datetime']\n",
"pylab import has clobbered these variables: ['__version__', 'random', 'datetime', 'plot']\n",
"`%matplotlib` prevents importing * from pylab and numpy\n",
"\n"
]
@ -837,18 +837,18 @@
"Best parameters set:\n",
"\tlearning_rate: 0.1\n",
"\tmax_depth: 8\n",
"\tn_estimators: 100\n"
"\tn_estimators: 90\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_4500\\2239815117.py:239: UserWarning:\n",
"C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_2496\\2239815117.py:239: UserWarning:\n",
"\n",
"The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
"\n",
"C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_4500\\2239815117.py:273: FutureWarning:\n",
"C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_2496\\2239815117.py:273: FutureWarning:\n",
"\n",
"Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n",
"\n"
@ -859,18 +859,69 @@
"output_type": "stream",
"text": [
"日期\n",
"2024-12-17 3501.835693\n",
"2024-12-23 3503.160645\n",
"Name: 日度预测价格, dtype: float32\n",
"{\"confirmFlg\":false,\"status\":true}\n",
"新增数据: ['2024-12-17', 7957.0, 6984.0, 0.0, 0.25, 3650.0, 0.55, 0.0, 0.0, 3520.0, 7.9, 0.2, 0.2, 3500.0, 0.75, '', 3500.0, 74.35, 72.69, '', 3522.0, 28.8998, '', '', '', 13.58990112, 229522.1, 7722.02, 3511.1707, '', '', 40514.8218813, 7088.86, '']\n"
"新增数据: ['2024-12-23', 7957.0, 6904.0, 0.08, 0.25, 3650.0, 1.54, 0.0, 0.0, 3500.0, 7.9, 0.1, 0.2, 3500.0, 1.05, '', 3500.0, 72.6, '', '', 3538.0, 27.0525, '', '', '', '', 229522.1, 8639.74, 3463.8854, '', '', 40121.2216621, 7423.12, '']\n",
"20241224\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_2496\\2239815117.py:299: UserWarning:\n",
"\n",
"The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Using matplotlib backend: QtAgg\n",
"%pylab is deprecated, use %matplotlib inline and import the required libraries.\n",
"Populating the interactive namespace from numpy and matplotlib\n",
"Fitting 3 folds for each of 180 candidates, totalling 540 fits\n",
"Best score: 0.997\n",
"Best parameters set:\n",
"\tlearning_rate: 0.1\n",
"\tmax_depth: 10\n",
"\tn_estimators: 100\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_2496\\2239815117.py:239: UserWarning:\n",
"\n",
"The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
"\n",
"C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_2496\\2239815117.py:273: FutureWarning:\n",
"\n",
"Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n",
"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"日期\n",
"2024-12-24 3499.874512\n",
"Name: 日度预测价格, dtype: float32\n",
"{\"confirmFlg\":false,\"status\":true}\n",
"新增数据: ['2024-12-24', 7984.0, 6904.0, 0.08, 0.25, 3650.0, 1.71, 0.0, 0.0, 3500.0, 7.9, 0.1, 0.2, 3500.0, 1.15, '', 3500.0, 72.6, 72.67, '', 3521.0, 25.6158, '', '', '', 13.33799789, 229522.1, 5417.02, 3427.8064, '', 1000.0, 44319.2299367, '', 3650.0]\n"
]
}
],
"source": [
"from datetime import datetime, timedelta\n",
"\n",
"start_date = datetime(2024, 12, 17)\n",
"end_date = datetime(2024, 12, 18)\n",
"start_date = datetime(2024, 12, 23)\n",
"end_date = datetime(2024, 12, 25)\n",
"\n",
"while start_date < end_date:\n",
" print(start_date.strftime('%Y%m%d'))\n",

View File

@ -38,6 +38,21 @@ edbnamelist = [
# '新加坡(含硫0.05%) 柴油现货价','柴油10ppm国际市场FOB中间价新加坡','Bloomberg Commodity Fair Value Singapore Mogas 92 Swap Month 1','97#汽油FOB新加坡现货价','无铅汽油97#国际市场FOB中间价新加坡'
]
edbcodenamedict = {'ovx index': '原油波动率',
'dxy curncy': '美元指数',
'C2403128043': 'Brent连1合约价格拟合残差/美元指数',
'C2403150124': 'Brent连1合约价格拟合残差/Brent 连2-连3',
'DOESCRUD Index': '美国商业原油库存',
'FVHCM1 INDEX': '美国取暖油裂解C1',
'doedtprd index': '美国成品油表需',
'CFFDQMMN INDEX': 'WTI管理资金净多持仓',
'C2403083739': 'WTI基金多空持仓比',
'C2404167878': 'WTI基金净持仓COT指标代码运算',
'lmcads03 lme comdty': 'LME铜价',
'GC1 COMB Comdty': '黄金连1合约',
'C2404167855': '金油比'}
# eta自有数据指标编码
@ -94,6 +109,7 @@ ClassifyId = 1214
# login_pushreport_url = "http://10.200.32.39/jingbo-api/api/server/login"
# upload_url = "http://10.200.32.39/jingbo-api/api/analysis/reportInfo/researchUploadReportSave"
# upload_warning_url = "http://10.200.32.39/jingbo-api/api/basicBuiness/crudeOilWarning/save"
# query_data_list_item_nos_url = "http://10.200.32.39/jingbo-api/api/warehouse/dwDataItem/queryDataListItemNos"
# login_data = {
# "data": {
@ -134,6 +150,18 @@ ClassifyId = 1214
# }
# }
# query_data_list_item_nos_data = {
# "funcModule": "数据项",
# "funcOperation": "查询",
# "data": {
# "dateStart":"20200101",
# "dateEnd":"20241231",
# "dataItemNoList":["Brentzdj","Brentzgj"] # 数据项编码,代表 brent最低价和最高价
# }
# }
# # 生产环境数据库
# host = 'rm-2zehj3r1n60ttz9x5.mysql.rds.aliyuncs.com'
# port = 3306
@ -236,7 +264,7 @@ table_name = 'v_tbl_crude_oil_warning'
### 开关
is_train = False # 是否训练
is_debug = False # 是否调试
is_eta = True # 是否使用eta接口
is_eta = False # 是否使用eta接口
is_market = True # 是否通过市场信息平台获取特征 ,在is_eta 为true 的情况下生效
is_timefurture = True # 是否使用时间特征
is_fivemodels = False # 是否使用之前保存的最佳的5个模型

4478
data.csv

File diff suppressed because it is too large Load Diff

View File

@ -1341,6 +1341,7 @@ class EtaReader():
Frequency = i.get('Frequency') # 频度要保存到df的频度列
SourceName = i.get('SourceName') # 来源名称要保存到df的频度列
Source = i.get('Source') # 来源ID要保存到df的频度列
Unit = i.get('Unit') # 单位要保存到df的单位列
# 频度不是 日 或者 周的 跳过
if Frequency not in ['日度','周度','','']:
continue
@ -1390,7 +1391,18 @@ class EtaReader():
stop_update_period = (datetime.datetime.strptime(today, "%Y-%m-%d") - datetime.datetime.strptime(time_sequence[-1], "%Y-%m-%d")).days
# 保存频度 指标名称 分类 指标id 到 df
df2 = pd.DataFrame({'指标分类': ClassifyName, '指标名称': EdbName, '指标id': EdbCode, '频度': Frequency,'指标来源':SourceName,'来源id':Source,'最后更新时间':edbname_df['DataTime'].values[-1],'更新周期':most_common_weekday,'预警日期':warning_date,'停更周期':stop_update_period},index=[0])
df2 = pd.DataFrame({'指标分类': ClassifyName,
'指标名称': EdbName,
'指标id': EdbCode,
'单位': Unit,
'频度': Frequency,
'指标来源':SourceName,
'来源id':Source,
'最后更新时间':edbname_df['DataTime'].values[-1],
'更新周期':most_common_weekday,
'预警日期':warning_date,
'停更周期':stop_update_period},index=[0],
)
# df = pd.merge(df, df2, how='outer')
df = pd.concat([df, df2])

View File

@ -549,7 +549,18 @@ def exception_logger(func):
raise e # 重新抛出异常
return wrapper
def get_week_date(end_time):
'''
获取上上周五上周周一周二周三周四周五的日期
'''
import datetime
endtime = end_time
endtimeweek = datetime.datetime.strptime(endtime, '%Y-%m-%d')
up_week = endtimeweek - datetime.timedelta(days=endtimeweek.weekday() + 14)
up_week_dates = [up_week + datetime.timedelta(days=i) for i in range(14)]
create_dates = [date.strftime('%Y-%m-%d') for date in up_week_dates[4:-3]]
ds_dates = [date.strftime('%Y-%m-%d') for date in up_week_dates[-7:-2]]
return create_dates,ds_dates
if __name__ == '__main__':
print('This is a tool, not a script.')

View File

@ -106,12 +106,13 @@ def predict_main():
# 保存最新日期的y值到数据库
# 取第一行数据存储到数据库中
first_row = df[['ds', 'y']].tail(1)
# 判断ds是否与ent_time 一致且 y 不为空
if first_row['ds'].values[0] == end_time and not np.isnan(first_row['y'].values[0]):
pass
else:
print(first_row['ds'].values[0])
print(first_row['y'].values[0])
# 判断y的类型是否为float
if not isinstance(first_row['y'].values[0], float):
logger.info(f'{end_time}预测目标数据为空,跳过')
return None
# 将最新真实值保存到数据库
if not sqlitedb.check_table_exists('trueandpredict'):
first_row.to_sql('trueandpredict', sqlitedb.connection, index=False)
@ -135,10 +136,16 @@ def predict_main():
logger.info('更新accuracy表的y值')
# 找到update_y 中ds且df中的y的行
update_y = update_y[update_y['ds']<=end_time]
for row in update_y.itertuples(index=False):
row_dict = row._asdict()
yy = df[df['ds']==row_dict['ds']]['y'].values[0]
sqlitedb.update_data('accuracy', f"y = {yy}", where_condition=f"ds = '{row_dict['ds']}'")
logger.info(f'要更新y的信息{update_y}')
try:
for row in update_y.itertuples(index=False):
row_dict = row._asdict()
yy = df[df['ds']==row_dict['ds']]['y'].values[0]
LOW = df[df['ds']==row_dict['ds']]['Brentzdj'].values[0]
HIGH = df[df['ds']==row_dict['ds']]['Brentzgj'].values[0]
sqlitedb.update_data('accuracy', f"y = {yy},LOW_PRICE = {LOW},HIGH_PRICE = {HIGH}", where_condition=f"ds = '{row_dict['ds']}'")
except Exception as e:
logger.info(f'更新accuracy表的y值失败{e}')
import datetime
# 判断当前日期是不是周一
@ -240,14 +247,14 @@ def predict_main():
logger.info('训练数据绘图end')
# 模型报告
# logger.info('制作报告ing')
# title = f'{settings}--{now}-预测报告' # 报告标题
logger.info('制作报告ing')
title = f'{settings}--{end_time}-预测报告' # 报告标题
# brent_export_pdf(dataset=dataset,num_models = 5 if is_fivemodels else 22,time=end_time,
# reportname=reportname,sqlitedb=sqlitedb),
brent_export_pdf(dataset=dataset,num_models = 5 if is_fivemodels else 22,time=end_time,
reportname=reportname,sqlitedb=sqlitedb),
# logger.info('制作报告end')
# logger.info('模型训练完成')
logger.info('制作报告end')
logger.info('模型训练完成')
# # LSTM 单变量模型
# ex_Lstm(df,input_seq_len=input_size,output_seq_len=horizon,is_debug=is_debug,dataset=dataset)
@ -275,11 +282,6 @@ if __name__ == '__main__':
global end_time
is_on = True
# 遍历2024-11-25 到 2024-12-3 之间的工作日日期
for i_time in pd.date_range('2024-12-24', '2024-12-26', freq='B'):
for i_time in pd.date_range('2024-12-24', '2024-12-25', freq='B'):
end_time = i_time.strftime('%Y-%m-%d')
predict_main()
if is_on:
is_train = False
is_on = False
is_fivemodels = True
is_eta = False

View File

@ -7,7 +7,7 @@ import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import datetime
from lib.tools import Graphs,mse,rmse,mae,exception_logger
from lib.tools import save_to_database
from lib.tools import save_to_database,get_week_date
from lib.dataread import *
from neuralforecast import NeuralForecast
from neuralforecast.models import NHITS,Informer, NBEATSx,LSTM,PatchTST, iTransformer, TSMixer
@ -251,7 +251,7 @@ def model_losss(sqlitedb,end_time):
df_combined = df_combined[df_combined['CREAT_DATE'] == df_combined['max_cutoff']]
df_combined4 = df_combined.copy() # 备份df_combined,后面画图需要
# 删除模型生成的cutoff列
df_combined.drop(columns=['CREAT_DATE', 'max_cutoff','created_dt','min_within_quantile','max_within_quantile','id','min_price','max_price'], inplace=True)
df_combined.drop(columns=['CREAT_DATE', 'max_cutoff','created_dt','min_within_quantile','max_within_quantile','id','min_price','max_price','LOW_PRICE','HIGH_PRICE'], inplace=True)
# 获取模型名称
modelnames = df_combined.columns.to_list()[1:]
if 'y' in modelnames:
@ -313,30 +313,6 @@ def model_losss(sqlitedb,end_time):
df_predict['ds'] = pd.to_datetime(df_predict['ds'],format=r'%Y-%m-%d')
except ValueError :
df_predict['ds'] = pd.to_datetime(df_predict['ds'],format=r'%Y/%m/%d')
# def first_row_to_database(df):
# # # 取第一行数据存储到数据库中
# first_row = df.head(1)
# first_row['ds'] = first_row['ds'].dt.strftime('%Y-%m-%d 00:00:00')
# # 将预测结果保存到数据库
# if not sqlitedb.check_table_exists('trueandpredict'):
# first_row.to_sql('trueandpredict',sqlitedb.connection,index=False)
# else:
# for col in first_row.columns:
# sqlitedb.add_column_if_not_exists('trueandpredict',col,'TEXT')
# for row in first_row.itertuples(index=False):
# row_dict = row._asdict()
# columns=row_dict.keys()
# check_query = sqlitedb.select_data('trueandpredict',where_condition = f"ds = '{row.ds}'")
# if len(check_query) > 0:
# set_clause = ", ".join([f"{key} = '{value}'" for key, value in row_dict.items()])
# sqlitedb.update_data('trueandpredict',set_clause,where_condition = f"ds = '{row.ds}'")
# continue
# sqlitedb.insert_data('trueandpredict',tuple(row_dict.values()),columns=columns)
# first_row_to_database(df_predict)
df_combined3 = pd.concat([df_combined3, df_predict]).reset_index(drop=True)
@ -408,7 +384,7 @@ def model_losss(sqlitedb,end_time):
# 保存到数据库
if not sqlitedb.check_table_exists('accuracy'):
columns = ','.join(df_combined3.columns.to_list()+['id','CREAT_DATE','min_price','max_price'])
columns = ','.join(df_combined3.columns.to_list()+['id','CREAT_DATE','min_price','max_price','LOW_PRICE','HIGH_PRICE','mean'])
sqlitedb.create_table('accuracy',columns=columns)
existing_data = sqlitedb.select_data(table_name = "accuracy")
@ -422,7 +398,8 @@ def model_losss(sqlitedb,end_time):
save_to_database(sqlitedb,df_predict2,"accuracy",end_time)
# 上周准确率计算
predict_y = sqlitedb.select_data(table_name = "accuracy")
accuracy_df = sqlitedb.select_data(table_name = "accuracy")
predict_y = accuracy_df.copy()
# ids = predict_y[predict_y['min_price'].isnull()]['id'].tolist()
ids = predict_y['id'].tolist()
# 准确率基准与绘图上下界逻辑一致
@ -432,6 +409,7 @@ def model_losss(sqlitedb,end_time):
# predict_y['max_price'] = predict_y[modelnames].mean(axis=1) +1
# 模型评估前十均值
predict_y['min_price'] = predict_y[allmodelnames[0:10]].mean(axis=1) -1.5
predict_y['mean'] = predict_y[allmodelnames[0:10]].mean(axis=1)
predict_y['max_price'] = predict_y[allmodelnames[0:10]].mean(axis=1) +1.5
# 模型评估前十最大最小
# allmodelnames 和 predict_y 列 重复的
@ -441,17 +419,11 @@ def model_losss(sqlitedb,end_time):
for id in ids:
row = predict_y[predict_y['id'] == id]
try:
sqlitedb.update_data('accuracy',f"min_price = {row['min_price'].values[0]},max_price = {row['max_price'].values[0]}",f"id = {id}")
sqlitedb.update_data('accuracy',f"min_price = {row['min_price'].values[0]},max_price = {row['max_price'].values[0]},mean={row['mean']}",f"id = {id}")
except:
logger.error(f'更新accuracy表中的min_price,max_price值失败row={row}')
logger.error(f'更新accuracy表中的min_price,max_price,mean值失败row={row}')
# 拼接市场最高最低价
xlsfilename = os.path.join(dataset,'数据项下载.xls')
df2 = pd.read_excel(xlsfilename)[5:]
df2 = df2.rename(columns = {'数据项名称':'ds','布伦特最低价':'LOW_PRICE','布伦特最高价':'HIGH_PRICE'})
print(df2.shape)
df = pd.merge(predict_y,df2,on=['ds'],how='left')
df = accuracy_df.copy()
df['ds'] = pd.to_datetime(df['ds'])
df = df.reindex()
@ -486,15 +458,6 @@ def model_losss(sqlitedb,end_time):
df[columns] = df[columns].astype(float)
df['ACCURACY'] = df.apply(calculate_accuracy, axis=1)
# df['ACCURACY'] = df.apply(is_within_range, axis=1)
# 取结束日期上一周的日期
def get_week_date(end_time):
endtime = end_time
endtimeweek = datetime.datetime.strptime(endtime, '%Y-%m-%d')
up_week = endtimeweek - datetime.timedelta(days=endtimeweek.weekday() + 14)
up_week_dates = [up_week + datetime.timedelta(days=i) for i in range(14)]
create_dates = [date.strftime('%Y-%m-%d') for date in up_week_dates[4:-3]]
ds_dates = [date.strftime('%Y-%m-%d') for date in up_week_dates[-7:-2]]
return create_dates,ds_dates
# 计算准确率并保存结果
def _get_accuracy_rate(df,create_dates,ds_dates,endtime):
@ -503,10 +466,6 @@ def model_losss(sqlitedb,end_time):
df3 = df3[df3['ds'].isin(ds_dates)]
accuracy_rote = 0
for i,group in df3.groupby('CREAT_DATE'):
# print('日期:',i)
# print(group)
# print('权重:',weight_dict[len(group)-1])
# print('准确率:',(group['ACCURACY'].sum()/len(group))*weight_dict[len(group)-1])
accuracy_rote += (group['ACCURACY'].sum()/len(group))*weight_dict[len(group)-1]
df3.to_csv(os.path.join(dataset,f'accuracy_{endtime}.csv'),index=False)
df4 = pd.DataFrame(columns=['开始日期','结束日期','准确率'])
@ -548,8 +507,6 @@ def model_losss(sqlitedb,end_time):
# 历史价格+预测价格
sqlitedb.drop_table('testandpredict_groupby')
df_combined3.to_sql('testandpredict_groupby',sqlitedb.connection,index=False)
# 新增均值列
df_combined3['mean'] = df_combined3[modelnames].mean(axis=1)
def _plt_predict_ture(df):
lens = df.shape[0] if df.shape[0] < 180 else 90
@ -557,15 +514,13 @@ def model_losss(sqlitedb,end_time):
# 历史价格
plt.figure(figsize=(20, 10))
plt.plot(df['ds'], df['y'], label='真实值')
# 均值线
plt.plot(df['ds'], df['mean'], color='r', linestyle='--', label='前五模型预测均值')
# 颜色填充
plt.fill_between(df['ds'], df['max_within_quantile'], df['min_within_quantile'], alpha=0.2)
markers = ['o', 's', '^', 'D', 'v', '*', 'p', 'h', 'H', '+', 'x', 'd']
random_marker = random.choice(markers)
for model in modelnames:
# markers = ['o', 's', '^', 'D', 'v', '*', 'p', 'h', 'H', '+', 'x', 'd']
# random_marker = random.choice(markers)
# for model in allmodelnames:
# for model in ['BiTCN','RNN']:
plt.plot(df['ds'][-horizon:], df[model][-horizon:], label=model,marker=random_marker)
# plt.plot(df['ds'], df[model], label=model,marker=random_marker)
# plt.plot(df_combined3['ds'], df_combined3['min_abs_error_rate_prediction'], label='最小绝对误差', linestyle='--', color='orange')
# 网格
plt.grid(True)
@ -573,15 +528,15 @@ def model_losss(sqlitedb,end_time):
for i, j in zip(df['ds'], df['y']):
plt.text(i, j, str(j), ha='center', va='bottom')
# for model in most_model:
# plt.plot(df['ds'], df[model], label=model,marker='o')
for model in most_model:
plt.plot(df['ds'], df[model], label=model,marker='o')
# 当前日期画竖虚线
plt.axvline(x=df['ds'].iloc[-horizon], color='r', linestyle='--')
plt.legend()
plt.xlabel('日期')
plt.ylabel('价格')
plt.savefig(os.path.join(dataset,'历史价格-预测值.png'), bbox_inches='tight')
plt.savefig(os.path.join(dataset,f'{end_time}历史价格-预测值.png'), bbox_inches='tight')
plt.close()
@ -613,7 +568,7 @@ def model_losss(sqlitedb,end_time):
plt.xlabel('日期')
plt.ylabel('价格')
plt.savefig(os.path.join(dataset,'历史价格-预测值1.png'), bbox_inches='tight')
plt.savefig(os.path.join(dataset,f'{end_time}历史价格-预测值1.png'), bbox_inches='tight')
plt.close()
@ -997,6 +952,9 @@ def brent_export_pdf(num_indicators=475,num_models=21, num_dayindicator=202,inpu
content.append(Graphs.draw_text('4. 预测结果没有真实值作为参考依据通道上界取近60个交易日内预测在上界值的模型对应的预测值通道下界同理'))
content.append(Graphs.draw_text('5. 预测结果选用近20个交易日内最多接近真实值的模型的预测值对应的预测结果'))
content.append(Graphs.draw_text('6. 预测结果在通道外的,代表最接近真实值的预测结果不在置信波动范围内。'))
# 添加历史走势及预测价格的走势图片
content.append(Graphs.draw_img(os.path.join(dataset,'历史价格-预测值1.png')))
# 取df中y列为空的行

File diff suppressed because one or more lines are too long