PriceForecast/聚烯烃预测值绘图调试.ipynb
2024-11-20 14:41:17 +08:00

875 lines
61 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "7fadc60c-d710-4b8c-89cd-1d889ece1eaf",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"WARNING:tensorflow:From C:\\Users\\EDY\\AppData\\Roaming\\Python\\Python311\\site-packages\\keras\\src\\losses.py:2976: The name tf.losses.sparse_softmax_cross_entropy is deprecated. Please use tf.compat.v1.losses.sparse_softmax_cross_entropy instead.\n",
"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"从eta获取数据...\n",
"从eta获取数据...\n"
]
},
{
"ename": "Exception",
"evalue": "(\"请求失败,请确认是否为内网环境: HTTPConnectionPool(host='10.189.2.78', port=8108): Max retries exceeded with url: /v1/edb/classify/list?ClassifyType= (Caused by ConnectTimeoutError(<urllib3.connection.HTTPConnection object at 0x0000028116365890>, 'Connection to 10.189.2.78 timed out. (connect timeout=None)'))\", '\\x1b[0m')",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mTimeoutError\u001b[0m Traceback (most recent call last)",
"File \u001b[1;32md:\\ProgramData\\anaconda3\\Lib\\site-packages\\urllib3\\connection.py:203\u001b[0m, in \u001b[0;36mHTTPConnection._new_conn\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 202\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 203\u001b[0m sock \u001b[38;5;241m=\u001b[39m connection\u001b[38;5;241m.\u001b[39mcreate_connection(\n\u001b[0;32m 204\u001b[0m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_dns_host, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mport),\n\u001b[0;32m 205\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtimeout,\n\u001b[0;32m 206\u001b[0m source_address\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msource_address,\n\u001b[0;32m 207\u001b[0m socket_options\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msocket_options,\n\u001b[0;32m 208\u001b[0m )\n\u001b[0;32m 209\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m socket\u001b[38;5;241m.\u001b[39mgaierror \u001b[38;5;28;01mas\u001b[39;00m e:\n",
"File \u001b[1;32md:\\ProgramData\\anaconda3\\Lib\\site-packages\\urllib3\\util\\connection.py:85\u001b[0m, in \u001b[0;36mcreate_connection\u001b[1;34m(address, timeout, source_address, socket_options)\u001b[0m\n\u001b[0;32m 84\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m---> 85\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m err\n\u001b[0;32m 86\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[0;32m 87\u001b[0m \u001b[38;5;66;03m# Break explicitly a reference cycle\u001b[39;00m\n",
"File \u001b[1;32md:\\ProgramData\\anaconda3\\Lib\\site-packages\\urllib3\\util\\connection.py:73\u001b[0m, in \u001b[0;36mcreate_connection\u001b[1;34m(address, timeout, source_address, socket_options)\u001b[0m\n\u001b[0;32m 72\u001b[0m sock\u001b[38;5;241m.\u001b[39mbind(source_address)\n\u001b[1;32m---> 73\u001b[0m sock\u001b[38;5;241m.\u001b[39mconnect(sa)\n\u001b[0;32m 74\u001b[0m \u001b[38;5;66;03m# Break explicitly a reference cycle\u001b[39;00m\n",
"\u001b[1;31mTimeoutError\u001b[0m: [WinError 10060] 由于连接方在一段时间后没有正确答复或连接的主机没有反应,连接尝试失败。",
"\nThe above exception was the direct cause of the following exception:\n",
"\u001b[1;31mConnectTimeoutError\u001b[0m Traceback (most recent call last)",
"File \u001b[1;32md:\\ProgramData\\anaconda3\\Lib\\site-packages\\urllib3\\connectionpool.py:791\u001b[0m, in \u001b[0;36mHTTPConnectionPool.urlopen\u001b[1;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)\u001b[0m\n\u001b[0;32m 790\u001b[0m \u001b[38;5;66;03m# Make the request on the HTTPConnection object\u001b[39;00m\n\u001b[1;32m--> 791\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_make_request(\n\u001b[0;32m 792\u001b[0m conn,\n\u001b[0;32m 793\u001b[0m method,\n\u001b[0;32m 794\u001b[0m url,\n\u001b[0;32m 795\u001b[0m timeout\u001b[38;5;241m=\u001b[39mtimeout_obj,\n\u001b[0;32m 796\u001b[0m body\u001b[38;5;241m=\u001b[39mbody,\n\u001b[0;32m 797\u001b[0m headers\u001b[38;5;241m=\u001b[39mheaders,\n\u001b[0;32m 798\u001b[0m chunked\u001b[38;5;241m=\u001b[39mchunked,\n\u001b[0;32m 799\u001b[0m retries\u001b[38;5;241m=\u001b[39mretries,\n\u001b[0;32m 800\u001b[0m response_conn\u001b[38;5;241m=\u001b[39mresponse_conn,\n\u001b[0;32m 801\u001b[0m preload_content\u001b[38;5;241m=\u001b[39mpreload_content,\n\u001b[0;32m 802\u001b[0m decode_content\u001b[38;5;241m=\u001b[39mdecode_content,\n\u001b[0;32m 803\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mresponse_kw,\n\u001b[0;32m 804\u001b[0m )\n\u001b[0;32m 806\u001b[0m \u001b[38;5;66;03m# Everything went great!\u001b[39;00m\n",
"File \u001b[1;32md:\\ProgramData\\anaconda3\\Lib\\site-packages\\urllib3\\connectionpool.py:497\u001b[0m, in \u001b[0;36mHTTPConnectionPool._make_request\u001b[1;34m(self, conn, method, url, body, headers, retries, timeout, chunked, response_conn, preload_content, decode_content, enforce_content_length)\u001b[0m\n\u001b[0;32m 496\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 497\u001b[0m conn\u001b[38;5;241m.\u001b[39mrequest(\n\u001b[0;32m 498\u001b[0m method,\n\u001b[0;32m 499\u001b[0m url,\n\u001b[0;32m 500\u001b[0m body\u001b[38;5;241m=\u001b[39mbody,\n\u001b[0;32m 501\u001b[0m headers\u001b[38;5;241m=\u001b[39mheaders,\n\u001b[0;32m 502\u001b[0m chunked\u001b[38;5;241m=\u001b[39mchunked,\n\u001b[0;32m 503\u001b[0m preload_content\u001b[38;5;241m=\u001b[39mpreload_content,\n\u001b[0;32m 504\u001b[0m decode_content\u001b[38;5;241m=\u001b[39mdecode_content,\n\u001b[0;32m 505\u001b[0m enforce_content_length\u001b[38;5;241m=\u001b[39menforce_content_length,\n\u001b[0;32m 506\u001b[0m )\n\u001b[0;32m 508\u001b[0m \u001b[38;5;66;03m# We are swallowing BrokenPipeError (errno.EPIPE) since the server is\u001b[39;00m\n\u001b[0;32m 509\u001b[0m \u001b[38;5;66;03m# legitimately able to close the connection after sending a valid response.\u001b[39;00m\n\u001b[0;32m 510\u001b[0m \u001b[38;5;66;03m# With this behaviour, the received response is still readable.\u001b[39;00m\n",
"File \u001b[1;32md:\\ProgramData\\anaconda3\\Lib\\site-packages\\urllib3\\connection.py:395\u001b[0m, in \u001b[0;36mHTTPConnection.request\u001b[1;34m(self, method, url, body, headers, chunked, preload_content, decode_content, enforce_content_length)\u001b[0m\n\u001b[0;32m 394\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mputheader(header, value)\n\u001b[1;32m--> 395\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mendheaders()\n\u001b[0;32m 397\u001b[0m \u001b[38;5;66;03m# If we're given a body we start sending that in chunks.\u001b[39;00m\n",
"File \u001b[1;32md:\\ProgramData\\anaconda3\\Lib\\http\\client.py:1289\u001b[0m, in \u001b[0;36mHTTPConnection.endheaders\u001b[1;34m(self, message_body, encode_chunked)\u001b[0m\n\u001b[0;32m 1288\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m CannotSendHeader()\n\u001b[1;32m-> 1289\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_send_output(message_body, encode_chunked\u001b[38;5;241m=\u001b[39mencode_chunked)\n",
"File \u001b[1;32md:\\ProgramData\\anaconda3\\Lib\\http\\client.py:1048\u001b[0m, in \u001b[0;36mHTTPConnection._send_output\u001b[1;34m(self, message_body, encode_chunked)\u001b[0m\n\u001b[0;32m 1047\u001b[0m \u001b[38;5;28;01mdel\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_buffer[:]\n\u001b[1;32m-> 1048\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msend(msg)\n\u001b[0;32m 1050\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m message_body \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 1051\u001b[0m \n\u001b[0;32m 1052\u001b[0m \u001b[38;5;66;03m# create a consistent interface to message_body\u001b[39;00m\n",
"File \u001b[1;32md:\\ProgramData\\anaconda3\\Lib\\http\\client.py:986\u001b[0m, in \u001b[0;36mHTTPConnection.send\u001b[1;34m(self, data)\u001b[0m\n\u001b[0;32m 985\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mauto_open:\n\u001b[1;32m--> 986\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconnect()\n\u001b[0;32m 987\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n",
"File \u001b[1;32md:\\ProgramData\\anaconda3\\Lib\\site-packages\\urllib3\\connection.py:243\u001b[0m, in \u001b[0;36mHTTPConnection.connect\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 242\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mconnect\u001b[39m(\u001b[38;5;28mself\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m--> 243\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msock \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_new_conn()\n\u001b[0;32m 244\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_tunnel_host:\n\u001b[0;32m 245\u001b[0m \u001b[38;5;66;03m# If we're tunneling it means we're connected to our proxy.\u001b[39;00m\n",
"File \u001b[1;32md:\\ProgramData\\anaconda3\\Lib\\site-packages\\urllib3\\connection.py:212\u001b[0m, in \u001b[0;36mHTTPConnection._new_conn\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 211\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m SocketTimeout \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m--> 212\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m ConnectTimeoutError(\n\u001b[0;32m 213\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[0;32m 214\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mConnection to \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhost\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m timed out. (connect timeout=\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtimeout\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m)\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 215\u001b[0m ) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01me\u001b[39;00m\n\u001b[0;32m 217\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mOSError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n",
"\u001b[1;31mConnectTimeoutError\u001b[0m: (<urllib3.connection.HTTPConnection object at 0x0000028116365890>, 'Connection to 10.189.2.78 timed out. (connect timeout=None)')",
"\nThe above exception was the direct cause of the following exception:\n",
"\u001b[1;31mMaxRetryError\u001b[0m Traceback (most recent call last)",
"File \u001b[1;32md:\\ProgramData\\anaconda3\\Lib\\site-packages\\requests\\adapters.py:486\u001b[0m, in \u001b[0;36mHTTPAdapter.send\u001b[1;34m(self, request, stream, timeout, verify, cert, proxies)\u001b[0m\n\u001b[0;32m 485\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 486\u001b[0m resp \u001b[38;5;241m=\u001b[39m conn\u001b[38;5;241m.\u001b[39murlopen(\n\u001b[0;32m 487\u001b[0m method\u001b[38;5;241m=\u001b[39mrequest\u001b[38;5;241m.\u001b[39mmethod,\n\u001b[0;32m 488\u001b[0m url\u001b[38;5;241m=\u001b[39murl,\n\u001b[0;32m 489\u001b[0m body\u001b[38;5;241m=\u001b[39mrequest\u001b[38;5;241m.\u001b[39mbody,\n\u001b[0;32m 490\u001b[0m headers\u001b[38;5;241m=\u001b[39mrequest\u001b[38;5;241m.\u001b[39mheaders,\n\u001b[0;32m 491\u001b[0m redirect\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[0;32m 492\u001b[0m assert_same_host\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[0;32m 493\u001b[0m preload_content\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[0;32m 494\u001b[0m decode_content\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[0;32m 495\u001b[0m retries\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmax_retries,\n\u001b[0;32m 496\u001b[0m timeout\u001b[38;5;241m=\u001b[39mtimeout,\n\u001b[0;32m 497\u001b[0m chunked\u001b[38;5;241m=\u001b[39mchunked,\n\u001b[0;32m 498\u001b[0m )\n\u001b[0;32m 500\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (ProtocolError, \u001b[38;5;167;01mOSError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m err:\n",
"File \u001b[1;32md:\\ProgramData\\anaconda3\\Lib\\site-packages\\urllib3\\connectionpool.py:845\u001b[0m, in \u001b[0;36mHTTPConnectionPool.urlopen\u001b[1;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)\u001b[0m\n\u001b[0;32m 843\u001b[0m new_e \u001b[38;5;241m=\u001b[39m ProtocolError(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mConnection aborted.\u001b[39m\u001b[38;5;124m\"\u001b[39m, new_e)\n\u001b[1;32m--> 845\u001b[0m retries \u001b[38;5;241m=\u001b[39m retries\u001b[38;5;241m.\u001b[39mincrement(\n\u001b[0;32m 846\u001b[0m method, url, error\u001b[38;5;241m=\u001b[39mnew_e, _pool\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m, _stacktrace\u001b[38;5;241m=\u001b[39msys\u001b[38;5;241m.\u001b[39mexc_info()[\u001b[38;5;241m2\u001b[39m]\n\u001b[0;32m 847\u001b[0m )\n\u001b[0;32m 848\u001b[0m retries\u001b[38;5;241m.\u001b[39msleep()\n",
"File \u001b[1;32md:\\ProgramData\\anaconda3\\Lib\\site-packages\\urllib3\\util\\retry.py:515\u001b[0m, in \u001b[0;36mRetry.increment\u001b[1;34m(self, method, url, response, error, _pool, _stacktrace)\u001b[0m\n\u001b[0;32m 514\u001b[0m reason \u001b[38;5;241m=\u001b[39m error \u001b[38;5;129;01mor\u001b[39;00m ResponseError(cause)\n\u001b[1;32m--> 515\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m MaxRetryError(_pool, url, reason) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mreason\u001b[39;00m \u001b[38;5;66;03m# type: ignore[arg-type]\u001b[39;00m\n\u001b[0;32m 517\u001b[0m log\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mIncremented Retry for (url=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m): \u001b[39m\u001b[38;5;132;01m%r\u001b[39;00m\u001b[38;5;124m\"\u001b[39m, url, new_retry)\n",
"\u001b[1;31mMaxRetryError\u001b[0m: HTTPConnectionPool(host='10.189.2.78', port=8108): Max retries exceeded with url: /v1/edb/classify/list?ClassifyType= (Caused by ConnectTimeoutError(<urllib3.connection.HTTPConnection object at 0x0000028116365890>, 'Connection to 10.189.2.78 timed out. (connect timeout=None)'))",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[1;31mConnectTimeout\u001b[0m Traceback (most recent call last)",
"File \u001b[1;32md:\\code\\PriceForecast\\lib\\dataread.py:1273\u001b[0m, in \u001b[0;36mEtaReader.get_eta_api_pp_data\u001b[1;34m(self, data_set, dataset)\u001b[0m\n\u001b[0;32m 1271\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m 1272\u001b[0m \u001b[38;5;66;03m# 发送GET请求 获取指标分类列表\u001b[39;00m\n\u001b[1;32m-> 1273\u001b[0m response \u001b[38;5;241m=\u001b[39m requests\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mclassifylisturl, headers\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mheaders) \n\u001b[0;32m 1274\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m requests\u001b[38;5;241m.\u001b[39mexceptions\u001b[38;5;241m.\u001b[39mRequestException \u001b[38;5;28;01mas\u001b[39;00m e:\n",
"File \u001b[1;32md:\\ProgramData\\anaconda3\\Lib\\site-packages\\requests\\api.py:73\u001b[0m, in \u001b[0;36mget\u001b[1;34m(url, params, **kwargs)\u001b[0m\n\u001b[0;32m 63\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124mr\u001b[39m\u001b[38;5;124;03m\"\"\"Sends a GET request.\u001b[39;00m\n\u001b[0;32m 64\u001b[0m \n\u001b[0;32m 65\u001b[0m \u001b[38;5;124;03m:param url: URL for the new :class:`Request` object.\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 70\u001b[0m \u001b[38;5;124;03m:rtype: requests.Response\u001b[39;00m\n\u001b[0;32m 71\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m---> 73\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m request(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mget\u001b[39m\u001b[38;5;124m\"\u001b[39m, url, params\u001b[38;5;241m=\u001b[39mparams, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
"File \u001b[1;32md:\\ProgramData\\anaconda3\\Lib\\site-packages\\requests\\api.py:59\u001b[0m, in \u001b[0;36mrequest\u001b[1;34m(method, url, **kwargs)\u001b[0m\n\u001b[0;32m 58\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m sessions\u001b[38;5;241m.\u001b[39mSession() \u001b[38;5;28;01mas\u001b[39;00m session:\n\u001b[1;32m---> 59\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m session\u001b[38;5;241m.\u001b[39mrequest(method\u001b[38;5;241m=\u001b[39mmethod, url\u001b[38;5;241m=\u001b[39murl, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
"File \u001b[1;32md:\\ProgramData\\anaconda3\\Lib\\site-packages\\requests\\sessions.py:589\u001b[0m, in \u001b[0;36mSession.request\u001b[1;34m(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)\u001b[0m\n\u001b[0;32m 588\u001b[0m send_kwargs\u001b[38;5;241m.\u001b[39mupdate(settings)\n\u001b[1;32m--> 589\u001b[0m resp \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msend(prep, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39msend_kwargs)\n\u001b[0;32m 591\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m resp\n",
"File \u001b[1;32md:\\ProgramData\\anaconda3\\Lib\\site-packages\\requests\\sessions.py:703\u001b[0m, in \u001b[0;36mSession.send\u001b[1;34m(self, request, **kwargs)\u001b[0m\n\u001b[0;32m 702\u001b[0m \u001b[38;5;66;03m# Send the request\u001b[39;00m\n\u001b[1;32m--> 703\u001b[0m r \u001b[38;5;241m=\u001b[39m adapter\u001b[38;5;241m.\u001b[39msend(request, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m 705\u001b[0m \u001b[38;5;66;03m# Total elapsed time of the request (approximately)\u001b[39;00m\n",
"File \u001b[1;32md:\\ProgramData\\anaconda3\\Lib\\site-packages\\requests\\adapters.py:507\u001b[0m, in \u001b[0;36mHTTPAdapter.send\u001b[1;34m(self, request, stream, timeout, verify, cert, proxies)\u001b[0m\n\u001b[0;32m 506\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(e\u001b[38;5;241m.\u001b[39mreason, NewConnectionError):\n\u001b[1;32m--> 507\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m ConnectTimeout(e, request\u001b[38;5;241m=\u001b[39mrequest)\n\u001b[0;32m 509\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(e\u001b[38;5;241m.\u001b[39mreason, ResponseError):\n",
"\u001b[1;31mConnectTimeout\u001b[0m: HTTPConnectionPool(host='10.189.2.78', port=8108): Max retries exceeded with url: /v1/edb/classify/list?ClassifyType= (Caused by ConnectTimeoutError(<urllib3.connection.HTTPConnection object at 0x0000028116365890>, 'Connection to 10.189.2.78 timed out. (connect timeout=None)'))",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[1;31mException\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[1], line 37\u001b[0m\n\u001b[0;32m 27\u001b[0m signature \u001b[38;5;241m=\u001b[39m BinanceAPI(APPID, SECRET)\n\u001b[0;32m 28\u001b[0m etadata \u001b[38;5;241m=\u001b[39m EtaReader(signature\u001b[38;5;241m=\u001b[39msignature,\n\u001b[0;32m 29\u001b[0m classifylisturl \u001b[38;5;241m=\u001b[39m classifylisturl,\n\u001b[0;32m 30\u001b[0m classifyidlisturl\u001b[38;5;241m=\u001b[39mclassifyidlisturl,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 35\u001b[0m edbbusinessurl\u001b[38;5;241m=\u001b[39medbbusinessurl,\n\u001b[0;32m 36\u001b[0m )\n\u001b[1;32m---> 37\u001b[0m df_zhibiaoshuju,df_zhibiaoliebiao \u001b[38;5;241m=\u001b[39m etadata\u001b[38;5;241m.\u001b[39mget_eta_api_pp_data(data_set\u001b[38;5;241m=\u001b[39mdata_set,dataset\u001b[38;5;241m=\u001b[39mdataset) \u001b[38;5;66;03m# 原始数据,未处理\u001b[39;00m\n\u001b[0;32m 39\u001b[0m \u001b[38;5;66;03m# 数据处理\u001b[39;00m\n\u001b[0;32m 40\u001b[0m df \u001b[38;5;241m=\u001b[39m datachuli_juxiting(df_zhibiaoshuju,df_zhibiaoliebiao,y \u001b[38;5;241m=\u001b[39m y,dataset\u001b[38;5;241m=\u001b[39mdataset,add_kdj\u001b[38;5;241m=\u001b[39madd_kdj,is_timefurture\u001b[38;5;241m=\u001b[39mis_timefurture,end_time\u001b[38;5;241m=\u001b[39mend_time) \n",
"File \u001b[1;32md:\\code\\PriceForecast\\lib\\dataread.py:1275\u001b[0m, in \u001b[0;36mEtaReader.get_eta_api_pp_data\u001b[1;34m(self, data_set, dataset)\u001b[0m\n\u001b[0;32m 1273\u001b[0m response \u001b[38;5;241m=\u001b[39m requests\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mclassifylisturl, headers\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mheaders) \n\u001b[0;32m 1274\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m requests\u001b[38;5;241m.\u001b[39mexceptions\u001b[38;5;241m.\u001b[39mRequestException \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m-> 1275\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m请求失败,请确认是否为内网环境: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00me\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m,\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\033\u001b[39;00m\u001b[38;5;124m[0m\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 1277\u001b[0m \u001b[38;5;66;03m# 检查响应状态码 \u001b[39;00m\n\u001b[0;32m 1278\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m response\u001b[38;5;241m.\u001b[39mstatus_code \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m200\u001b[39m: \n\u001b[0;32m 1279\u001b[0m \u001b[38;5;66;03m# 获取成功, 处理响应内容\u001b[39;00m\n",
"\u001b[1;31mException\u001b[0m: (\"请求失败,请确认是否为内网环境: HTTPConnectionPool(host='10.189.2.78', port=8108): Max retries exceeded with url: /v1/edb/classify/list?ClassifyType= (Caused by ConnectTimeoutError(<urllib3.connection.HTTPConnection object at 0x0000028116365890>, 'Connection to 10.189.2.78 timed out. (connect timeout=None)'))\", '\\x1b[0m')"
]
}
],
"source": [
"# 读取配置\n",
"from lib.dataread import *\n",
"from lib.tools import *\n",
"from models.nerulforcastmodels import ex_Model,model_losss,brent_export_pdf,tansuanli_export_pdf,pp_export_pdf,model_losss_juxiting\n",
"\n",
"import glob\n",
"import torch\n",
"torch.set_float32_matmul_precision(\"high\")\n",
"\n",
"sqlitedb = SQLiteHandler(db_name) \n",
"sqlitedb.connect()\n",
"\n",
"signature = BinanceAPI(APPID, SECRET)\n",
"etadata = EtaReader(signature=signature,\n",
" classifylisturl = classifylisturl,\n",
" classifyidlisturl=classifyidlisturl,\n",
" edbcodedataurl=edbcodedataurl,\n",
" edbcodelist=edbcodelist,\n",
" edbdatapushurl=edbdatapushurl,\n",
" edbdeleteurl=edbdeleteurl,\n",
" edbbusinessurl=edbbusinessurl\n",
" )\n",
"# 获取数据\n",
"if is_eta:\n",
" # eta数据\n",
" logger.info('从eta获取数据...')\n",
" signature = BinanceAPI(APPID, SECRET)\n",
" etadata = EtaReader(signature=signature,\n",
" classifylisturl = classifylisturl,\n",
" classifyidlisturl=classifyidlisturl,\n",
" edbcodedataurl=edbcodedataurl,\n",
" edbcodelist=edbcodelist,\n",
" edbdatapushurl=edbdatapushurl,\n",
" edbdeleteurl=edbdeleteurl,\n",
" edbbusinessurl=edbbusinessurl,\n",
" )\n",
" df_zhibiaoshuju,df_zhibiaoliebiao = etadata.get_eta_api_pp_data(data_set=data_set,dataset=dataset) # 原始数据,未处理\n",
"\n",
" # 数据处理\n",
" df = datachuli_juxiting(df_zhibiaoshuju,df_zhibiaoliebiao,y = y,dataset=dataset,add_kdj=add_kdj,is_timefurture=is_timefurture,end_time=end_time) \n",
"\n",
"else:\n",
" logger.info('读取本地数据:'+os.path.join(dataset,data_set))\n",
" df = getdata_juxiting(filename=os.path.join(dataset,data_set),y=y,dataset=dataset,add_kdj=add_kdj,is_timefurture=is_timefurture,end_time=end_time) # 原始数据,未处理\n",
"\n",
"# 更改预测列名称\n",
"df.rename(columns={y:'y'},inplace=True)\n",
" \n",
"if is_edbnamelist:\n",
" df = df[edbnamelist] \n",
"df.to_csv(os.path.join(dataset,'指标数据.csv'), index=False)\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ae059224-976c-4839-b455-f81da7f25179",
"metadata": {},
"outputs": [],
"source": [
"# 保存最新日期的y值到数据库\n",
"# 取第一行数据存储到数据库中\n",
"first_row = df[['ds','y']].tail(1)\n",
"# 将最新真实值保存到数据库\n",
"if not sqlitedb.check_table_exists('trueandpredict'):\n",
" first_row.to_sql('trueandpredict',sqlitedb.connection,index=False)\n",
"else:\n",
" for row in first_row.itertuples(index=False):\n",
" row_dict = row._asdict()\n",
" row_dict['ds'] = row_dict['ds'].strftime('%Y-%m-%d %H:%M:%S')\n",
" check_query = sqlitedb.select_data('trueandpredict',where_condition = f\"ds = '{row.ds}'\")\n",
" if len(check_query) > 0:\n",
" set_clause = \", \".join([f\"{key} = '{value}'\" for key, value in row_dict.items()])\n",
" sqlitedb.update_data('trueandpredict',set_clause,where_condition = f\"ds = '{row.ds}'\")\n",
" continue\n",
" sqlitedb.insert_data('trueandpredict',tuple(row_dict.values()),columns=row_dict.keys())\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "abb597fc-c5f3-4d76-8099-5eff358cb634",
"metadata": {},
"outputs": [],
"source": [
"import datetime\n",
"# 判断当前日期是不是周一\n",
"is_weekday = datetime.datetime.now().weekday() == 1\n",
"if is_weekday:\n",
" logger.info('今天是周一,更新预测模型')\n",
" # 计算最近20天预测残差最低的模型名称\n",
"\n",
" model_results = sqlitedb.select_data('trueandpredict',order_by = \"ds DESC\",limit = \"20\")\n",
" # 删除空值率为40%以上的列\n",
" print(model_results.shape)\n",
" model_results = model_results.dropna(thresh=len(model_results)*0.6,axis=1)\n",
" model_results = model_results.dropna()\n",
" print(model_results.shape)\n",
" modelnames = model_results.columns.to_list()[2:] \n",
" for col in model_results[modelnames].select_dtypes(include=['object']).columns:\n",
" model_results[col] = model_results[col].astype(np.float32)\n",
" # 计算每个预测值与真实值之间的偏差率\n",
" for model in modelnames:\n",
" model_results[f'{model}_abs_error_rate'] = abs(model_results['y'] - model_results[model]) / model_results['y']\n",
"\n",
" # 获取每行对应的最小偏差率值\n",
" min_abs_error_rate_values = model_results.apply(lambda row: row[[f'{model}_abs_error_rate' for model in modelnames]].min(), axis=1)\n",
" # 获取每行对应的最小偏差率值对应的列名\n",
" min_abs_error_rate_column_name = model_results.apply(lambda row: row[[f'{model}_abs_error_rate' for model in modelnames]].idxmin(), axis=1)\n",
" print(min_abs_error_rate_column_name)\n",
" # 将列名索引转换为列名\n",
" min_abs_error_rate_column_name = min_abs_error_rate_column_name.map(lambda x: x.split('_')[0])\n",
" # 取出现次数最多的模型名称\n",
" most_common_model = min_abs_error_rate_column_name.value_counts().idxmax()\n",
" logger.info(f\"最近20天预测残差最低的模型名称{most_common_model}\")\n",
"\n",
" # 保存结果到数据库\n",
" \n",
" if not sqlitedb.check_table_exists('most_model'):\n",
" sqlitedb.create_table('most_model',columns=\"ds datetime, most_common_model TEXT\")\n",
" sqlitedb.insert_data('most_model',(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),most_common_model,),columns=('ds','most_common_model',))\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ade7026e-8cf2-405f-a2da-9e90f364adab",
"metadata": {},
"outputs": [],
"source": [
"if is_corr:\n",
" df = corr_feature(df=df)\n",
"\n",
"df1 = df.copy() # 备份一下后面特征筛选完之后加入ds y 列用\n",
"logger.info(f\"开始训练模型...\")\n",
"row,col = df.shape\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dfef57d8-36da-423b-bbe7-05a13e15f71b",
"metadata": {},
"outputs": [],
"source": [
"now = datetime.datetime.now().strftime('%Y%m%d%H%M%S')\n",
"# ex_Model(df,\n",
"# horizon=horizon,\n",
"# input_size=input_size,\n",
"# train_steps=train_steps,\n",
"# val_check_steps=val_check_steps,\n",
"# early_stop_patience_steps=early_stop_patience_steps,\n",
"# is_debug=is_debug,\n",
"# dataset=dataset,\n",
"# is_train=is_train,\n",
"# is_fivemodels=is_fivemodels,\n",
"# val_size=val_size,\n",
"# test_size=test_size,\n",
"# settings=settings,\n",
"# now=now,\n",
"# etadata = etadata,\n",
"# modelsindex = modelsindex,\n",
"# data = data,\n",
"# is_eta=is_eta,\n",
"# )\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0e5b6f30-b7ca-4718-97a3-48b54156e07f",
"metadata": {},
"outputs": [],
"source": [
"logger.info('模型训练完成')\n",
"# # 模型评估\n",
"\n",
"pd.set_option('display.max_columns', 100)\n",
"\n",
"# 计算预测评估指数\n",
"def model_losss_juxiting(sqlitedb):\n",
" global dataset\n",
" most_model = [sqlitedb.select_data('most_model',columns=['most_common_model'],order_by='ds desc',limit=1).values[0][0]]\n",
" most_model_name = most_model[0]\n",
"\n",
" # 预测数据处理 predict\n",
" df_combined = loadcsv(os.path.join(dataset,\"cross_validation.csv\")) \n",
" df_combined = dateConvert(df_combined)\n",
" # 删除空列\n",
" df_combined.dropna(axis=1,inplace=True)\n",
" # 删除缺失值,预测过程不能有缺失值\n",
" df_combined.dropna(inplace=True) \n",
" # 其他列转为数值类型\n",
" df_combined = df_combined.astype({col: 'float32' for col in df_combined.columns if col not in ['cutoff','ds'] })\n",
" # 使用 groupby 和 transform 结合 lambda 函数来获取每个分组中 cutoff 的最小值,并创建一个新的列来存储这个最大值\n",
" df_combined['max_cutoff'] = df_combined.groupby('ds')['cutoff'].transform('max')\n",
"\n",
" # 然后筛选出那些 cutoff 等于 max_cutoff 的行,这样就得到了每个分组中 cutoff 最大的行,并保留了其他列\n",
" df_combined = df_combined[df_combined['cutoff'] == df_combined['max_cutoff']]\n",
" # 删除模型生成的cutoff列\n",
" df_combined.drop(columns=['cutoff', 'max_cutoff'], inplace=True)\n",
" # 获取模型名称\n",
" modelnames = df_combined.columns.to_list()[1:] \n",
" if 'y' in modelnames:\n",
" modelnames.remove('y')\n",
" df_combined3 = df_combined.copy() # 备份df_combined,后面画图需要\n",
"\n",
"\n",
" # 空的列表存储每个模型的MSE、RMSE、MAE、MAPE、SMAPE\n",
" cellText = []\n",
"\n",
" # 遍历模型名称,计算模型评估指标 \n",
" for model in modelnames:\n",
" modelmse = mse(df_combined['y'], df_combined[model])\n",
" modelrmse = rmse(df_combined['y'], df_combined[model])\n",
" modelmae = mae(df_combined['y'], df_combined[model])\n",
" # modelmape = mape(df_combined['y'], df_combined[model])\n",
" # modelsmape = smape(df_combined['y'], df_combined[model])\n",
" # modelr2 = r2_score(df_combined['y'], df_combined[model])\n",
" cellText.append([model,round(modelmse, 3), round(modelrmse, 3), round(modelmae, 3)])\n",
" \n",
" model_results3 = pd.DataFrame(cellText,columns=['模型(Model)','平均平方误差(MSE)', '均方根误差(RMSE)', '平均绝对误差(MAE)'])\n",
" # 按MSE降序排列\n",
" model_results3 = model_results3.sort_values(by='平均平方误差(MSE)', ascending=True)\n",
" model_results3.to_csv(os.path.join(dataset,\"model_evaluation.csv\"),index=False)\n",
" modelnames = model_results3['模型(Model)'].tolist()\n",
" allmodelnames = modelnames.copy()\n",
" # 保存5个最佳模型的名称\n",
" if len(modelnames) > 5:\n",
" modelnames = modelnames[0:5]\n",
" with open(os.path.join(dataset,\"best_modelnames.txt\"), 'w') as f:\n",
" f.write(','.join(modelnames) + '\\n')\n",
"\n",
" # 预测值与真实值对比图\n",
" plt.rcParams['font.sans-serif'] = ['SimHei']\n",
" plt.figure(figsize=(15, 10))\n",
" for n,model in enumerate(modelnames[:5]):\n",
" plt.subplot(3, 2, n+1)\n",
" plt.plot(df_combined3['ds'], df_combined3['y'], label='真实值')\n",
" plt.plot(df_combined3['ds'], df_combined3[model], label=model)\n",
" plt.legend()\n",
" plt.xlabel('日期')\n",
" plt.ylabel('价格')\n",
" plt.title(model+'拟合')\n",
" plt.subplots_adjust(hspace=0.5)\n",
" plt.savefig(os.path.join(dataset,'预测值与真实值对比图.png'), bbox_inches='tight')\n",
" plt.close()\n",
" \n",
"\n",
" # # 根据最佳模型的绝对误差的平均比例,最佳模型乘平均比例的百分比,确定最大最小值\n",
" import heapq # 使用堆来找到最大和最小的值\n",
" def find_min_max_within_quantile(row):\n",
" true_value = row['y']\n",
" row.drop(['ds','y'], inplace=True)\n",
" row = row.astype(float).round(2)\n",
"\n",
" max_heap = []\n",
" min_heap = []\n",
" for col in row.index:\n",
" # 对比真实值进行分类\n",
" if row[col] < true_value:\n",
" heapq.heappush(min_heap, row[col])\n",
" elif row[col] > true_value:\n",
" heapq.heappush(max_heap, -row[col]) # 使用负号来实现最大堆\n",
"\n",
" if len(max_heap) == 1:\n",
" max_y = max_heap[0]\n",
" elif len(max_heap) == 0:\n",
" max_y = -min_heap[-1]\n",
" else:\n",
" max_y = heapq.nsmallest(2, max_heap)[1]\n",
"\n",
" if len(min_heap) < 2 :\n",
" min_y = -max_heap[-1]\n",
" else:\n",
" min_y = heapq.nsmallest(2, min_heap)[-1]\n",
"\n",
"\n",
" # 获取最大和最小的值\n",
" q10 = min_y \n",
" q90 = -max_y\n",
"\n",
" # 获取最大和最小的模型名称\n",
" min_model = row[row == q10].idxmin()\n",
" max_model = row[row == q90].idxmax()\n",
"\n",
" # 设置上下界比例\n",
" rote = 1\n",
"\n",
" q10 = q10 * rote\n",
" q90 = q90 * rote\n",
"\n",
" logger.info(min_model,q10,max_model,q90)\n",
"\n",
" return pd.Series([q10, q90, min_model, max_model], index=['min_within_quantile', 'max_within_quantile', 'min_model', 'max_model'])\n",
" # # 遍历行\n",
" df_combined3[['min_within_quantile', 'max_within_quantile','min_model','max_model']] = df_combined3.apply(find_min_max_within_quantile, axis=1)\n",
" df_combined = df_combined.round(4)\n",
" print(df_combined3)\n",
"\n",
"\n",
" '''\n",
" # # 根据真实值y确定最大最小值,去掉最高最低的预测值\n",
" import heapq # 使用堆来找到最大和最小的值\n",
" def find_min_max_within_quantile(row):\n",
" true_value = row['y']\n",
" row.drop(['ds','y'], inplace=True)\n",
" row = row.astype(float).round(2)\n",
"\n",
" max_heap = []\n",
" min_heap = []\n",
" for col in row.index:\n",
" # 对比真实值进行分类\n",
" if row[col] < true_value:\n",
" heapq.heappush(min_heap, row[col])\n",
" elif row[col] > true_value:\n",
" heapq.heappush(max_heap, -row[col]) # 使用负号来实现最大堆\n",
"\n",
" if len(max_heap) == 1:\n",
" max_y = max_heap[0]\n",
" elif len(max_heap) == 0:\n",
" max_y = -min_heap[-1]\n",
" else:\n",
" max_y = heapq.nsmallest(2, max_heap)[1]\n",
"\n",
" if len(min_heap) < 2 :\n",
" min_y = -max_heap[-1]\n",
" else:\n",
" min_y = heapq.nsmallest(2, min_heap)[-1]\n",
"\n",
"\n",
" # 获取最大和最小的值\n",
" q10 = min_y \n",
" q90 = -max_y\n",
"\n",
" # 获取最大和最小的模型名称\n",
" min_model = row[row == q10].idxmin()\n",
" max_model = row[row == q90].idxmax()\n",
"\n",
" # 设置上下界比例\n",
" rote = 1\n",
"\n",
" q10 = q10 * rote\n",
" q90 = q90 * rote\n",
"\n",
" logger.info(min_model,q10,max_model,q90)\n",
"\n",
" return pd.Series([q10, q90, min_model, max_model], index=['min_within_quantile', 'max_within_quantile', 'min_model', 'max_model'])\n",
" # # 遍历行\n",
" df_combined3[['min_within_quantile', 'max_within_quantile','min_model','max_model']] = df_combined3.apply(find_min_max_within_quantile, axis=1)\n",
" df_combined = df_combined.round(4)\n",
" print(df_combined3)\n",
" '''\n",
"\n",
" '''\n",
" #使用最佳五个模型进行绘图\n",
" best_models = pd.read_csv(os.path.join(dataset,'best_modelnames.txt'),header=None).values.flatten().tolist()\n",
" def find_min_max_within_quantile(row):\n",
" row = row[best_models]\n",
" q10 = row.min()\n",
" q90 = row.max()\n",
" # 获取 row行最大最小值模型名称\n",
" min_model = row[row == q10].idxmin()\n",
" max_model = row[row == q90].idxmin()\n",
" \n",
" # # 判断flot值是否为空值\n",
" # if pd.isna(q10) or pd.isna(q90):\n",
" return pd.Series([q10, q90,min_model,max_model], index=['min_within_quantile','max_within_quantile','min_model','max_model'])\n",
"\n",
" # 遍历行\n",
" df_combined3[['min_within_quantile', 'max_within_quantile','min_model','max_model']] = df_combined3.apply(find_min_max_within_quantile, axis=1)\n",
" df_combined = df_combined.round(4)\n",
" print(df_combined3)\n",
" '''\n",
" \n",
" '''\n",
" # 通道使用模型评估前80%作为置信度\n",
" def find_min_max_within_quantile(row):\n",
" row.drop(['ds','y'], inplace=True)\n",
" row = row.astype(float).round(2)\n",
"\n",
" row_sorted = row\n",
" # 计算 10% 和 90% 位置的索引\n",
" index_10 = 0\n",
" index_90 = int(len(row_sorted) * 0.8)\n",
" q10 = row_sorted[index_10]\n",
" q90 = row_sorted[index_90]\n",
" # 获取模型名称\n",
" min_model = row[row == q10].idxmin()\n",
" max_model = row[row == q90].idxmin()\n",
"\n",
" \n",
" # # 判断flot值是否为空值\n",
" # if pd.isna(q10) or pd.isna(q90):\n",
" return pd.Series([q10, q90,min_model,max_model], index=['min_within_quantile','max_within_quantile','min_model','max_model'])\n",
"\n",
" # 重新排列\n",
" df_combined3 = df_combined3[['ds','y'] + allmodelnames]\n",
" # 遍历行\n",
" df_combined3[['min_within_quantile', 'max_within_quantile','min_model','max_model']] = df_combined3.apply(find_min_max_within_quantile, axis=1)\n",
" df_combined = df_combined.round(4)\n",
" print(df_combined3)\n",
"\n",
" '''\n",
" \n",
" '''\n",
" # 通道使用预测模型的80%置信度\n",
" def find_min_max_within_quantile(row):\n",
" row.drop(['ds','y'], inplace=True)\n",
" row = row.astype(float).round(2)\n",
"\n",
" row_sorted = row.sort_values(ascending=True).reset_index(drop=True)\n",
" # 计算 10% 和 90% 位置的索引\n",
" index_10 = int(len(row_sorted) * 0.1)\n",
" index_90 = int(len(row_sorted) * 0.9)\n",
" q10 = row_sorted[index_10]\n",
" q90 = row_sorted[index_90]\n",
" # 获取模型名称\n",
" min_model = row[row == q10].idxmin()\n",
" max_model = row[row == q90].idxmin()\n",
"\n",
" \n",
" # # 判断flot值是否为空值\n",
" # if pd.isna(q10) or pd.isna(q90):\n",
" return pd.Series([q10, q90,min_model,max_model], index=['min_within_quantile','max_within_quantile','min_model','max_model'])\n",
"\n",
" # 遍历行\n",
" df_combined3[['min_within_quantile', 'max_within_quantile','min_model','max_model']] = df_combined3.apply(find_min_max_within_quantile, axis=1)\n",
" df_combined = df_combined.round(4)\n",
" print(df_combined3)\n",
" '''\n",
" \n",
"\n",
" '''\n",
" # 计算波动率,取近60日波动率的10%和90%分位数确定通道上下界\n",
" df_combined3['volatility'] = df_combined3['y'].pct_change().round(4)\n",
" # 计算近60日的波动率 10% 90%分位数\n",
" df_combined3['quantile_10'] = df_combined3['volatility'].rolling(60).quantile(0.1)\n",
" df_combined3['quantile_90'] = df_combined3['volatility'].rolling(60).quantile(0.9)\n",
" df_combined3 = df_combined3.round(4)\n",
" # 计算分位数对应的价格\n",
" df_combined3['quantile_10_price'] = df_combined3['y'] * (1 + df_combined3['quantile_10'])\n",
" df_combined3['quantile_90_price'] = df_combined3['y'] * (1 + df_combined3['quantile_90'])\n",
"\n",
" # 遍历行\n",
" def find_min_max_within_quantile(row):\n",
" # 获取分位数10%和90%的值\n",
" q10 = row['quantile_10_price']\n",
" q90 = row['quantile_90_price']\n",
" \n",
" # 判断flot值是否为空值\n",
" if pd.isna(q10) or pd.isna(q90):\n",
" return pd.Series([None, None, None, None], index=['min_within_quantile','max_within_quantile','min_model','max_model'])\n",
" \n",
" # 初始化最小和最大值为None\n",
" min_value = None\n",
" max_value = None\n",
" min_value_model = ''\n",
" max_value_model = ''\n",
"\n",
" # 遍历指定列,找出在分位数范围内的最大最小值\n",
" for model in modelnames:\n",
" value = row[model]\n",
" if value >= q10 and value <= q90:\n",
" if min_value is None or value < min_value:\n",
" min_value = value\n",
" min_value_model = model\n",
"\n",
" if max_value is None or value > max_value:\n",
" max_value = value\n",
" max_value_model = model\n",
" \n",
" # 返回最大最小值\n",
" return pd.Series([min_value, max_value,min_value_model,max_value_model], index=['min_within_quantile', 'max_within_quantile','min_model','max_model'])\n",
"\n",
" # 应用函数到每一行\n",
" df_combined3[['min_within_quantile', 'max_within_quantile','min_model','max_model']] = df_combined3.apply(find_min_max_within_quantile, axis=1)\n",
"\n",
" \n",
" '''\n",
" \n",
" # 去除有空值的行\n",
" df_combined3.dropna(inplace=True)\n",
" # 保存到数据库\n",
" df_combined3.to_sql('testandpredict_groupby', sqlitedb.connection, if_exists='replace', index=False)\n",
" df_combined3.to_csv(os.path.join(dataset,\"testandpredict_groupby.csv\"),index=False)\n",
"\n",
" '''\n",
" # 去掉方差最大的模型,其余模型预测最大最小值确定通道边界\n",
" \n",
" \n",
" # 历史数据+预测数据\n",
" # 拼接未来时间预测\n",
" df_predict = loadcsv(os.path.join(dataset,'predict.csv'))\n",
" df_predict.drop('unique_id',inplace=True,axis=1)\n",
" df_predict.dropna(axis=1,inplace=True)\n",
" df_predict2 = df_predict.copy()\n",
" try:\n",
" df_predict['ds'] = pd.to_datetime(df_predict['ds'],format=r'%Y-%m-%d')\n",
" except ValueError :\n",
" df_predict['ds'] = pd.to_datetime(df_predict['ds'],format=r'%Y/%m/%d')\n",
"\n",
" # 取第一行数据存储到数据库中\n",
" first_row = df_predict.head(1)\n",
" first_row['ds'] = first_row['ds'].dt.strftime('%Y-%m-%d 00:00:00')\n",
"\n",
" # # 将预测结果保存到数据库\n",
" df_combined3 = pd.concat([df_combined3, df_predict]).reset_index(drop=True)\n",
" # # 判断 df 的数值列转为float\n",
" for col in df_combined3.columns:\n",
" try:\n",
" if col != 'ds':\n",
" df_combined3[col] = df_combined3[col].astype(float)\n",
" df_combined3[col] = df_combined3[col].round(2)\n",
" except ValueError:\n",
" pass\n",
" df_combined3.to_csv(os.path.join(dataset,\"testandpredict_groupby.csv\"),index=False)\n",
" df_combined3['ds'] = df_combined3['ds'].dt.strftime('%Y-%m-%d 00:00:00')\n",
" # # 判断表存在\n",
" if not sqlitedb.check_table_exists('testandpredict_groupby'):\n",
" df_combined3.to_sql('testandpredict_groupby',sqlitedb.connection,index=False)\n",
" else:\n",
" for row in df_combined3.itertuples(index=False):\n",
" row_dict = row._asdict()\n",
" check_query = sqlitedb.select_data('testandpredict_groupby',where_condition = f\"ds = '{row.ds}'\")\n",
" if len(check_query) > 0:\n",
" set_clause = \", \".join([f\"{key} = '{value}'\" for key, value in row_dict.items()])\n",
" sqlitedb.update_data('testandpredict_groupby',set_clause,where_condition = f\"ds = '{row.ds}'\")\n",
" continue\n",
" sqlitedb.insert_data('testandpredict_groupby',tuple(row_dict.values()),columns=row_dict.keys())\n",
"\n",
" ten_models = allmodelnames\n",
" # 计算每个模型的方差\n",
" variances = df_combined3[ten_models].var()\n",
" # 找到方差最大的模型\n",
" max_variance_model = variances.idxmax()\n",
" # 打印方差最大的模型\n",
" print(\"方差最大的模型是:\", max_variance_model)\n",
" # 去掉方差最大的模型\n",
" df_combined3 = df_combined3.drop(columns=[max_variance_model])\n",
" if max_variance_model in allmodelnames:\n",
" allmodelnames.remove(max_variance_model)\n",
" df_combined3['min'] = df_combined3[allmodelnames].min(axis=1)\n",
" df_combined3['max'] = df_combined3[allmodelnames].max(axis=1)\n",
" print(df_combined3[['min','max']])\n",
" # 历史价格+预测价格\n",
" df_combined3 = df_combined3[-50:] # 取50个数据点画图\n",
" plt.figure(figsize=(20, 10))\n",
" plt.plot(df_combined3['ds'], df_combined3['y'], label='真实值',marker='o')\n",
" plt.plot(df_combined3['ds'], df_combined3[most_model], label=most_model_name)\n",
" plt.fill_between(df_combined3['ds'], df_combined3['min'], df_combined3['max'], alpha=0.2)\n",
" plt.grid(True)\n",
" # # 显示历史值\n",
" for i, j in zip(df_combined3['ds'][:-5], df_combined3['y'][:-5]):\n",
" plt.text(i, j, str(j), ha='center', va='bottom')\n",
" # 当前日期画竖虚线\n",
" plt.axvline(x=df_combined3['ds'].iloc[-horizon], color='r', linestyle='--')\n",
" plt.legend()\n",
" plt.xlabel('日期')\n",
" plt.ylabel('价格')\n",
"\n",
" plt.savefig(os.path.join(dataset,'历史价格-预测值.png'), bbox_inches='tight')\n",
" plt.close()\n",
" '''\n",
"\n",
" # # 历史数据+预测数据\n",
" # # 拼接未来时间预测\n",
" df_predict = pd.read_csv(os.path.join(dataset,'predict.csv'))\n",
" df_predict.drop('unique_id',inplace=True,axis=1)\n",
" df_predict.dropna(axis=1,inplace=True)\n",
"\n",
" try:\n",
" df_predict['ds'] = pd.to_datetime(df_predict['ds'],format=r'%Y-%m-%d')\n",
" except ValueError :\n",
" df_predict['ds'] = pd.to_datetime(df_predict['ds'],format=r'%Y/%m/%d')\n",
"\n",
" def first_row_to_database(df):\n",
" # # 取第一行数据存储到数据库中\n",
" first_row = df.head(1)\n",
" first_row['ds'] = first_row['ds'].dt.strftime('%Y-%m-%d 00:00:00')\n",
" # 将预测结果保存到数据库\n",
" if not sqlitedb.check_table_exists('trueandpredict'):\n",
" first_row.to_sql('trueandpredict',sqlitedb.connection,index=False)\n",
" else:\n",
" for col in first_row.columns:\n",
" sqlitedb.add_column_if_not_exists('trueandpredict',col,'TEXT')\n",
" for row in first_row.itertuples(index=False):\n",
" row_dict = row._asdict()\n",
" columns=row_dict.keys()\n",
" check_query = sqlitedb.select_data('trueandpredict',where_condition = f\"ds = '{row.ds}'\")\n",
" if len(check_query) > 0:\n",
" set_clause = \", \".join([f\"{key} = '{value}'\" for key, value in row_dict.items()])\n",
" sqlitedb.update_data('trueandpredict',set_clause,where_condition = f\"ds = '{row.ds}'\")\n",
" continue\n",
" sqlitedb.insert_data('trueandpredict',tuple(row_dict.values()),columns=columns)\n",
"\n",
" first_row_to_database(df_predict)\n",
" def find_most_common_model():\n",
" # 最多频率的模型名称\n",
" min_model_max_frequency_model = df_combined3['min_model'].tail(20).value_counts().idxmax()\n",
" max_model_max_frequency_model = df_combined3['max_model'].tail(20).value_counts().idxmax()\n",
" if min_model_max_frequency_model == max_model_max_frequency_model:\n",
" # 取20天第二多的模型\n",
" max_model_max_frequency_model = df_combined3['max_model'].tail(20).value_counts().nlargest(2).index[1]\n",
"\n",
" df_predict['min_model'] = min_model_max_frequency_model\n",
" df_predict['max_model'] = max_model_max_frequency_model\n",
" df_predict['min_within_quantile'] = df_predict[min_model_max_frequency_model]\n",
" df_predict['max_within_quantile'] = df_predict[max_model_max_frequency_model]\n",
"\n",
"\n",
" # find_most_common_model()\n",
"\n",
" df_predict2 = df_predict.copy()\n",
" df_predict2['ds'] = pd.to_datetime(df_predict2['ds'])\n",
" df_predict2['ds'] = df_predict2['ds'].dt.strftime('%Y-%m-%d 00:00:00')\n",
"\n",
"\n",
" def _add_abs_error_rate():\n",
" # 计算每个预测值与真实值之间的偏差率\n",
" for model in allmodelnames:\n",
" df_combined3[f'{model}_abs_error_rate'] = abs(df_combined3['y'] - df_combined3[model]) / df_combined3['y']\n",
"\n",
" # 获取每行对应的最小偏差率值\n",
" min_abs_error_rate_values = df_combined3.apply(lambda row: row[[f'{model}_abs_error_rate' for model in allmodelnames]].min(), axis=1)\n",
" # 获取每行对应的最小偏差率值对应的列名\n",
" min_abs_error_rate_column_name = df_combined3.apply(lambda row: row[[f'{model}_abs_error_rate' for model in allmodelnames]].idxmin(), axis=1) \n",
" # 将列名索引转换为列名\n",
" min_abs_error_rate_column_name = min_abs_error_rate_column_name.map(lambda x: x.split('_')[0])\n",
" # 获取最小偏差率对应的模型的预测值\n",
" min_abs_error_rate_predictions = df_combined3.apply(lambda row: row[min_abs_error_rate_column_name[row.name]], axis=1)\n",
" # 将最小偏差率对应的模型的预测值添加到DataFrame中\n",
" df_combined3['min_abs_error_rate_prediction'] = min_abs_error_rate_predictions\n",
" df_combined3['min_abs_error_rate_column_name'] = min_abs_error_rate_column_name\n",
"\n",
" _add_abs_error_rate()\n",
"\n",
" df_combined3 = pd.concat([df_combined3, df_predict]).reset_index(drop=True)\n",
" # 判断 df 的数值列转为float\n",
" for col in df_combined3.columns:\n",
" try:\n",
" if col != 'ds':\n",
" df_combined3[col] = df_combined3[col].astype(float)\n",
" df_combined3[col] = df_combined3[col].round(2)\n",
" except ValueError:\n",
" pass\n",
" df_combined3.to_csv(os.path.join(dataset,\"df_combined3.csv\"),index=False) \n",
" \n",
" # 历史价格+预测价格\n",
" # 将预测结果保存到数据库\n",
" # 判断表存在\n",
" # if not sqlitedb.check_table_exists('testandpredict_groupby'):\n",
" # df_combined3.to_sql('testandpredict_groupby',sqlitedb.connection,index=False)\n",
" # else:\n",
" # for row in df_combined3.itertuples(index=False):\n",
" # row_dict = row._asdict()\n",
" # check_query = sqlitedb.select_data('testandpredict_groupby',where_condition = f\"ds = '{row.ds}'\")\n",
" # if len(check_query) > 0:\n",
" # set_clause = \", \".join([f\"{key} = '{value}'\" for key, value in row_dict.items()])\n",
" # sqlitedb.update_data('testandpredict_groupby',set_clause,where_condition = f\"ds = '{row.ds}'\")\n",
" # continue\n",
" # sqlitedb.insert_data('testandpredict_groupby',tuple(row_dict.values()),columns=row_dict.keys())\n",
" \n",
" \n",
" def _plt_predict_ture(df):\n",
" df = df[-50:] # 取50个数据点画图\n",
" # 历史价格\n",
" plt.figure(figsize=(20, 10))\n",
" plt.plot(df['ds'], df['y'], label='真实值')\n",
" # 颜色填充\n",
" plt.fill_between(df['ds'], df['min_within_quantile'], df['max_within_quantile'], alpha=0.2)\n",
" # plt.plot(df_combined3['ds'], df_combined3['min_abs_error_rate_prediction'], label='最小绝对误差', linestyle='--', color='orange')\n",
" # 网格\n",
" plt.grid(True)\n",
" # 显示历史值\n",
" for i, j in zip(df['ds'], df['y']):\n",
" plt.text(i, j, str(j), ha='center', va='bottom')\n",
"\n",
" for model in most_model:\n",
" plt.plot(df['ds'], df[model], label=model,marker='o')\n",
" # 当前日期画竖虚线\n",
" plt.axvline(x=df['ds'].iloc[-horizon], color='r', linestyle='--')\n",
" plt.legend()\n",
" plt.xlabel('日期')\n",
" plt.ylabel('价格')\n",
" \n",
" plt.savefig(os.path.join(dataset,'历史价格-预测值.png'), bbox_inches='tight')\n",
" plt.close()\n",
"\n",
" def _plt_predict_table(df): \n",
" # 预测值表格\n",
" fig, ax = plt.subplots(figsize=(20, 6))\n",
" ax.axis('off') # 关闭坐标轴\n",
" # 数值保留2位小数\n",
" df = df.round(2)\n",
" df = df[-horizon:]\n",
" df['Day'] = [f'Day_{i}' for i in range(1,horizon+1)]\n",
" # Day列放到最前面\n",
" df = df[['Day'] + list(df.columns[:-1])]\n",
" table = ax.table(cellText=df.values, colLabels=df.columns, loc='center')\n",
" #加宽表格\n",
" table.auto_set_font_size(False)\n",
" table.set_fontsize(10)\n",
"\n",
" # 设置表格样式,列数据最小的用绿色标识\n",
" plt.savefig(os.path.join(dataset,'预测值表格.png'), bbox_inches='tight')\n",
" plt.close()\n",
" \n",
" def _plt_model_results3():\n",
" # 可视化评估结果\n",
" plt.rcParams['font.sans-serif'] = ['SimHei']\n",
" fig, ax = plt.subplots(figsize=(20, 10))\n",
" ax.axis('off') # 关闭坐标轴\n",
" table = ax.table(cellText=model_results3.values, colLabels=model_results3.columns, loc='center')\n",
" # 加宽表格\n",
" table.auto_set_font_size(False)\n",
" table.set_fontsize(10)\n",
"\n",
" # 设置表格样式,列数据最小的用绿色标识\n",
" plt.savefig(os.path.join(dataset,'模型评估.png'), bbox_inches='tight')\n",
" plt.close()\n",
"\n",
" _plt_predict_ture(df_combined3)\n",
" _plt_predict_table(df_combined3)\n",
" _plt_model_results3()\n",
"\n",
" return model_results3\n",
"\n",
"\n",
"\n",
"logger.info('训练数据绘图ing')\n",
"model_results3 = model_losss_juxiting(sqlitedb)\n",
"\n",
"logger.info('训练数据绘图end')\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "85b557de-8235-4e27-b5b8-58b36dfe6724",
"metadata": {},
"outputs": [],
"source": [
"# 模型报告\n",
"\n",
"logger.info('制作报告ing')\n",
"title = f'{settings}--{now}-预测报告' # 报告标题\n",
"\n",
"pp_export_pdf(dataset=dataset,num_models = 5 if is_fivemodels else 22,time=end_time,\n",
" reportname=reportname,sqlitedb=sqlitedb),\n",
"\n",
"logger.info('制作报告end')\n",
"logger.info('模型训练完成')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d4129e71-ee2c-4af1-81ed-fadf14efa206",
"metadata": {},
"outputs": [],
"source": [
"# 发送邮件\n",
"m = SendMail(\n",
" username=username,\n",
" passwd=passwd,\n",
" recv=recv,\n",
" title=title,\n",
" content=content,\n",
" file=max(glob.glob(os.path.join(dataset,'*.pdf')), key=os.path.getctime),\n",
" ssl=ssl,\n",
")\n",
"# m.send_mail() \n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "base",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}