{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "7fadc60c-d710-4b8c-89cd-1d889ece1eaf", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "WARNING:tensorflow:From C:\\Users\\EDY\\AppData\\Roaming\\Python\\Python311\\site-packages\\keras\\src\\losses.py:2976: The name tf.losses.sparse_softmax_cross_entropy is deprecated. Please use tf.compat.v1.losses.sparse_softmax_cross_entropy instead.\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "从eta获取数据...\n", "从eta获取数据...\n" ] }, { "ename": "Exception", "evalue": "(\"请求失败,请确认是否为内网环境: HTTPConnectionPool(host='10.189.2.78', port=8108): Max retries exceeded with url: /v1/edb/classify/list?ClassifyType= (Caused by ConnectTimeoutError(, 'Connection to 10.189.2.78 timed out. (connect timeout=None)'))\", '\\x1b[0m')", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mTimeoutError\u001b[0m Traceback (most recent call last)", "File \u001b[1;32md:\\ProgramData\\anaconda3\\Lib\\site-packages\\urllib3\\connection.py:203\u001b[0m, in \u001b[0;36mHTTPConnection._new_conn\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 202\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 203\u001b[0m sock \u001b[38;5;241m=\u001b[39m connection\u001b[38;5;241m.\u001b[39mcreate_connection(\n\u001b[0;32m 204\u001b[0m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_dns_host, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mport),\n\u001b[0;32m 205\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtimeout,\n\u001b[0;32m 206\u001b[0m source_address\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msource_address,\n\u001b[0;32m 207\u001b[0m socket_options\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msocket_options,\n\u001b[0;32m 208\u001b[0m )\n\u001b[0;32m 209\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m socket\u001b[38;5;241m.\u001b[39mgaierror \u001b[38;5;28;01mas\u001b[39;00m e:\n", "File \u001b[1;32md:\\ProgramData\\anaconda3\\Lib\\site-packages\\urllib3\\util\\connection.py:85\u001b[0m, in \u001b[0;36mcreate_connection\u001b[1;34m(address, timeout, source_address, socket_options)\u001b[0m\n\u001b[0;32m 84\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m---> 85\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m err\n\u001b[0;32m 86\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[0;32m 87\u001b[0m \u001b[38;5;66;03m# Break explicitly a reference cycle\u001b[39;00m\n", "File \u001b[1;32md:\\ProgramData\\anaconda3\\Lib\\site-packages\\urllib3\\util\\connection.py:73\u001b[0m, in \u001b[0;36mcreate_connection\u001b[1;34m(address, timeout, source_address, socket_options)\u001b[0m\n\u001b[0;32m 72\u001b[0m sock\u001b[38;5;241m.\u001b[39mbind(source_address)\n\u001b[1;32m---> 73\u001b[0m sock\u001b[38;5;241m.\u001b[39mconnect(sa)\n\u001b[0;32m 74\u001b[0m \u001b[38;5;66;03m# Break explicitly a reference cycle\u001b[39;00m\n", "\u001b[1;31mTimeoutError\u001b[0m: [WinError 10060] 由于连接方在一段时间后没有正确答复或连接的主机没有反应,连接尝试失败。", "\nThe above exception was the direct cause of the following exception:\n", "\u001b[1;31mConnectTimeoutError\u001b[0m Traceback (most recent call last)", "File \u001b[1;32md:\\ProgramData\\anaconda3\\Lib\\site-packages\\urllib3\\connectionpool.py:791\u001b[0m, in \u001b[0;36mHTTPConnectionPool.urlopen\u001b[1;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)\u001b[0m\n\u001b[0;32m 790\u001b[0m \u001b[38;5;66;03m# Make the request on the HTTPConnection object\u001b[39;00m\n\u001b[1;32m--> 791\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_make_request(\n\u001b[0;32m 792\u001b[0m conn,\n\u001b[0;32m 793\u001b[0m method,\n\u001b[0;32m 794\u001b[0m url,\n\u001b[0;32m 795\u001b[0m timeout\u001b[38;5;241m=\u001b[39mtimeout_obj,\n\u001b[0;32m 796\u001b[0m body\u001b[38;5;241m=\u001b[39mbody,\n\u001b[0;32m 797\u001b[0m headers\u001b[38;5;241m=\u001b[39mheaders,\n\u001b[0;32m 798\u001b[0m chunked\u001b[38;5;241m=\u001b[39mchunked,\n\u001b[0;32m 799\u001b[0m retries\u001b[38;5;241m=\u001b[39mretries,\n\u001b[0;32m 800\u001b[0m response_conn\u001b[38;5;241m=\u001b[39mresponse_conn,\n\u001b[0;32m 801\u001b[0m preload_content\u001b[38;5;241m=\u001b[39mpreload_content,\n\u001b[0;32m 802\u001b[0m decode_content\u001b[38;5;241m=\u001b[39mdecode_content,\n\u001b[0;32m 803\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mresponse_kw,\n\u001b[0;32m 804\u001b[0m )\n\u001b[0;32m 806\u001b[0m \u001b[38;5;66;03m# Everything went great!\u001b[39;00m\n", "File \u001b[1;32md:\\ProgramData\\anaconda3\\Lib\\site-packages\\urllib3\\connectionpool.py:497\u001b[0m, in \u001b[0;36mHTTPConnectionPool._make_request\u001b[1;34m(self, conn, method, url, body, headers, retries, timeout, chunked, response_conn, preload_content, decode_content, enforce_content_length)\u001b[0m\n\u001b[0;32m 496\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 497\u001b[0m conn\u001b[38;5;241m.\u001b[39mrequest(\n\u001b[0;32m 498\u001b[0m method,\n\u001b[0;32m 499\u001b[0m url,\n\u001b[0;32m 500\u001b[0m body\u001b[38;5;241m=\u001b[39mbody,\n\u001b[0;32m 501\u001b[0m headers\u001b[38;5;241m=\u001b[39mheaders,\n\u001b[0;32m 502\u001b[0m chunked\u001b[38;5;241m=\u001b[39mchunked,\n\u001b[0;32m 503\u001b[0m preload_content\u001b[38;5;241m=\u001b[39mpreload_content,\n\u001b[0;32m 504\u001b[0m decode_content\u001b[38;5;241m=\u001b[39mdecode_content,\n\u001b[0;32m 505\u001b[0m enforce_content_length\u001b[38;5;241m=\u001b[39menforce_content_length,\n\u001b[0;32m 506\u001b[0m )\n\u001b[0;32m 508\u001b[0m \u001b[38;5;66;03m# We are swallowing BrokenPipeError (errno.EPIPE) since the server is\u001b[39;00m\n\u001b[0;32m 509\u001b[0m \u001b[38;5;66;03m# legitimately able to close the connection after sending a valid response.\u001b[39;00m\n\u001b[0;32m 510\u001b[0m \u001b[38;5;66;03m# With this behaviour, the received response is still readable.\u001b[39;00m\n", "File \u001b[1;32md:\\ProgramData\\anaconda3\\Lib\\site-packages\\urllib3\\connection.py:395\u001b[0m, in \u001b[0;36mHTTPConnection.request\u001b[1;34m(self, method, url, body, headers, chunked, preload_content, decode_content, enforce_content_length)\u001b[0m\n\u001b[0;32m 394\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mputheader(header, value)\n\u001b[1;32m--> 395\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mendheaders()\n\u001b[0;32m 397\u001b[0m \u001b[38;5;66;03m# If we're given a body we start sending that in chunks.\u001b[39;00m\n", "File \u001b[1;32md:\\ProgramData\\anaconda3\\Lib\\http\\client.py:1289\u001b[0m, in \u001b[0;36mHTTPConnection.endheaders\u001b[1;34m(self, message_body, encode_chunked)\u001b[0m\n\u001b[0;32m 1288\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m CannotSendHeader()\n\u001b[1;32m-> 1289\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_send_output(message_body, encode_chunked\u001b[38;5;241m=\u001b[39mencode_chunked)\n", "File \u001b[1;32md:\\ProgramData\\anaconda3\\Lib\\http\\client.py:1048\u001b[0m, in \u001b[0;36mHTTPConnection._send_output\u001b[1;34m(self, message_body, encode_chunked)\u001b[0m\n\u001b[0;32m 1047\u001b[0m \u001b[38;5;28;01mdel\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_buffer[:]\n\u001b[1;32m-> 1048\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msend(msg)\n\u001b[0;32m 1050\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m message_body \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 1051\u001b[0m \n\u001b[0;32m 1052\u001b[0m \u001b[38;5;66;03m# create a consistent interface to message_body\u001b[39;00m\n", "File \u001b[1;32md:\\ProgramData\\anaconda3\\Lib\\http\\client.py:986\u001b[0m, in \u001b[0;36mHTTPConnection.send\u001b[1;34m(self, data)\u001b[0m\n\u001b[0;32m 985\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mauto_open:\n\u001b[1;32m--> 986\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconnect()\n\u001b[0;32m 987\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", "File \u001b[1;32md:\\ProgramData\\anaconda3\\Lib\\site-packages\\urllib3\\connection.py:243\u001b[0m, in \u001b[0;36mHTTPConnection.connect\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 242\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mconnect\u001b[39m(\u001b[38;5;28mself\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m--> 243\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msock \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_new_conn()\n\u001b[0;32m 244\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_tunnel_host:\n\u001b[0;32m 245\u001b[0m \u001b[38;5;66;03m# If we're tunneling it means we're connected to our proxy.\u001b[39;00m\n", "File \u001b[1;32md:\\ProgramData\\anaconda3\\Lib\\site-packages\\urllib3\\connection.py:212\u001b[0m, in \u001b[0;36mHTTPConnection._new_conn\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 211\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m SocketTimeout \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m--> 212\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m ConnectTimeoutError(\n\u001b[0;32m 213\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[0;32m 214\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mConnection to \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhost\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m timed out. (connect timeout=\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtimeout\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m)\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 215\u001b[0m ) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01me\u001b[39;00m\n\u001b[0;32m 217\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mOSError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n", "\u001b[1;31mConnectTimeoutError\u001b[0m: (, 'Connection to 10.189.2.78 timed out. (connect timeout=None)')", "\nThe above exception was the direct cause of the following exception:\n", "\u001b[1;31mMaxRetryError\u001b[0m Traceback (most recent call last)", "File \u001b[1;32md:\\ProgramData\\anaconda3\\Lib\\site-packages\\requests\\adapters.py:486\u001b[0m, in \u001b[0;36mHTTPAdapter.send\u001b[1;34m(self, request, stream, timeout, verify, cert, proxies)\u001b[0m\n\u001b[0;32m 485\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 486\u001b[0m resp \u001b[38;5;241m=\u001b[39m conn\u001b[38;5;241m.\u001b[39murlopen(\n\u001b[0;32m 487\u001b[0m method\u001b[38;5;241m=\u001b[39mrequest\u001b[38;5;241m.\u001b[39mmethod,\n\u001b[0;32m 488\u001b[0m url\u001b[38;5;241m=\u001b[39murl,\n\u001b[0;32m 489\u001b[0m body\u001b[38;5;241m=\u001b[39mrequest\u001b[38;5;241m.\u001b[39mbody,\n\u001b[0;32m 490\u001b[0m headers\u001b[38;5;241m=\u001b[39mrequest\u001b[38;5;241m.\u001b[39mheaders,\n\u001b[0;32m 491\u001b[0m redirect\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[0;32m 492\u001b[0m assert_same_host\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[0;32m 493\u001b[0m preload_content\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[0;32m 494\u001b[0m decode_content\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[0;32m 495\u001b[0m retries\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmax_retries,\n\u001b[0;32m 496\u001b[0m timeout\u001b[38;5;241m=\u001b[39mtimeout,\n\u001b[0;32m 497\u001b[0m chunked\u001b[38;5;241m=\u001b[39mchunked,\n\u001b[0;32m 498\u001b[0m )\n\u001b[0;32m 500\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (ProtocolError, \u001b[38;5;167;01mOSError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m err:\n", "File \u001b[1;32md:\\ProgramData\\anaconda3\\Lib\\site-packages\\urllib3\\connectionpool.py:845\u001b[0m, in \u001b[0;36mHTTPConnectionPool.urlopen\u001b[1;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)\u001b[0m\n\u001b[0;32m 843\u001b[0m new_e \u001b[38;5;241m=\u001b[39m ProtocolError(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mConnection aborted.\u001b[39m\u001b[38;5;124m\"\u001b[39m, new_e)\n\u001b[1;32m--> 845\u001b[0m retries \u001b[38;5;241m=\u001b[39m retries\u001b[38;5;241m.\u001b[39mincrement(\n\u001b[0;32m 846\u001b[0m method, url, error\u001b[38;5;241m=\u001b[39mnew_e, _pool\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m, _stacktrace\u001b[38;5;241m=\u001b[39msys\u001b[38;5;241m.\u001b[39mexc_info()[\u001b[38;5;241m2\u001b[39m]\n\u001b[0;32m 847\u001b[0m )\n\u001b[0;32m 848\u001b[0m retries\u001b[38;5;241m.\u001b[39msleep()\n", "File \u001b[1;32md:\\ProgramData\\anaconda3\\Lib\\site-packages\\urllib3\\util\\retry.py:515\u001b[0m, in \u001b[0;36mRetry.increment\u001b[1;34m(self, method, url, response, error, _pool, _stacktrace)\u001b[0m\n\u001b[0;32m 514\u001b[0m reason \u001b[38;5;241m=\u001b[39m error \u001b[38;5;129;01mor\u001b[39;00m ResponseError(cause)\n\u001b[1;32m--> 515\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m MaxRetryError(_pool, url, reason) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mreason\u001b[39;00m \u001b[38;5;66;03m# type: ignore[arg-type]\u001b[39;00m\n\u001b[0;32m 517\u001b[0m log\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mIncremented Retry for (url=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m): \u001b[39m\u001b[38;5;132;01m%r\u001b[39;00m\u001b[38;5;124m\"\u001b[39m, url, new_retry)\n", "\u001b[1;31mMaxRetryError\u001b[0m: HTTPConnectionPool(host='10.189.2.78', port=8108): Max retries exceeded with url: /v1/edb/classify/list?ClassifyType= (Caused by ConnectTimeoutError(, 'Connection to 10.189.2.78 timed out. (connect timeout=None)'))", "\nDuring handling of the above exception, another exception occurred:\n", "\u001b[1;31mConnectTimeout\u001b[0m Traceback (most recent call last)", "File \u001b[1;32md:\\code\\PriceForecast\\lib\\dataread.py:1273\u001b[0m, in \u001b[0;36mEtaReader.get_eta_api_pp_data\u001b[1;34m(self, data_set, dataset)\u001b[0m\n\u001b[0;32m 1271\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m 1272\u001b[0m \u001b[38;5;66;03m# 发送GET请求 获取指标分类列表\u001b[39;00m\n\u001b[1;32m-> 1273\u001b[0m response \u001b[38;5;241m=\u001b[39m requests\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mclassifylisturl, headers\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mheaders) \n\u001b[0;32m 1274\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m requests\u001b[38;5;241m.\u001b[39mexceptions\u001b[38;5;241m.\u001b[39mRequestException \u001b[38;5;28;01mas\u001b[39;00m e:\n", "File \u001b[1;32md:\\ProgramData\\anaconda3\\Lib\\site-packages\\requests\\api.py:73\u001b[0m, in \u001b[0;36mget\u001b[1;34m(url, params, **kwargs)\u001b[0m\n\u001b[0;32m 63\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124mr\u001b[39m\u001b[38;5;124;03m\"\"\"Sends a GET request.\u001b[39;00m\n\u001b[0;32m 64\u001b[0m \n\u001b[0;32m 65\u001b[0m \u001b[38;5;124;03m:param url: URL for the new :class:`Request` object.\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 70\u001b[0m \u001b[38;5;124;03m:rtype: requests.Response\u001b[39;00m\n\u001b[0;32m 71\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m---> 73\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m request(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mget\u001b[39m\u001b[38;5;124m\"\u001b[39m, url, params\u001b[38;5;241m=\u001b[39mparams, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n", "File \u001b[1;32md:\\ProgramData\\anaconda3\\Lib\\site-packages\\requests\\api.py:59\u001b[0m, in \u001b[0;36mrequest\u001b[1;34m(method, url, **kwargs)\u001b[0m\n\u001b[0;32m 58\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m sessions\u001b[38;5;241m.\u001b[39mSession() \u001b[38;5;28;01mas\u001b[39;00m session:\n\u001b[1;32m---> 59\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m session\u001b[38;5;241m.\u001b[39mrequest(method\u001b[38;5;241m=\u001b[39mmethod, url\u001b[38;5;241m=\u001b[39murl, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n", "File \u001b[1;32md:\\ProgramData\\anaconda3\\Lib\\site-packages\\requests\\sessions.py:589\u001b[0m, in \u001b[0;36mSession.request\u001b[1;34m(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)\u001b[0m\n\u001b[0;32m 588\u001b[0m send_kwargs\u001b[38;5;241m.\u001b[39mupdate(settings)\n\u001b[1;32m--> 589\u001b[0m resp \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msend(prep, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39msend_kwargs)\n\u001b[0;32m 591\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m resp\n", "File \u001b[1;32md:\\ProgramData\\anaconda3\\Lib\\site-packages\\requests\\sessions.py:703\u001b[0m, in \u001b[0;36mSession.send\u001b[1;34m(self, request, **kwargs)\u001b[0m\n\u001b[0;32m 702\u001b[0m \u001b[38;5;66;03m# Send the request\u001b[39;00m\n\u001b[1;32m--> 703\u001b[0m r \u001b[38;5;241m=\u001b[39m adapter\u001b[38;5;241m.\u001b[39msend(request, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m 705\u001b[0m \u001b[38;5;66;03m# Total elapsed time of the request (approximately)\u001b[39;00m\n", "File \u001b[1;32md:\\ProgramData\\anaconda3\\Lib\\site-packages\\requests\\adapters.py:507\u001b[0m, in \u001b[0;36mHTTPAdapter.send\u001b[1;34m(self, request, stream, timeout, verify, cert, proxies)\u001b[0m\n\u001b[0;32m 506\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(e\u001b[38;5;241m.\u001b[39mreason, NewConnectionError):\n\u001b[1;32m--> 507\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m ConnectTimeout(e, request\u001b[38;5;241m=\u001b[39mrequest)\n\u001b[0;32m 509\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(e\u001b[38;5;241m.\u001b[39mreason, ResponseError):\n", "\u001b[1;31mConnectTimeout\u001b[0m: HTTPConnectionPool(host='10.189.2.78', port=8108): Max retries exceeded with url: /v1/edb/classify/list?ClassifyType= (Caused by ConnectTimeoutError(, 'Connection to 10.189.2.78 timed out. (connect timeout=None)'))", "\nDuring handling of the above exception, another exception occurred:\n", "\u001b[1;31mException\u001b[0m Traceback (most recent call last)", "Cell \u001b[1;32mIn[1], line 37\u001b[0m\n\u001b[0;32m 27\u001b[0m signature \u001b[38;5;241m=\u001b[39m BinanceAPI(APPID, SECRET)\n\u001b[0;32m 28\u001b[0m etadata \u001b[38;5;241m=\u001b[39m EtaReader(signature\u001b[38;5;241m=\u001b[39msignature,\n\u001b[0;32m 29\u001b[0m classifylisturl \u001b[38;5;241m=\u001b[39m classifylisturl,\n\u001b[0;32m 30\u001b[0m classifyidlisturl\u001b[38;5;241m=\u001b[39mclassifyidlisturl,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 35\u001b[0m edbbusinessurl\u001b[38;5;241m=\u001b[39medbbusinessurl,\n\u001b[0;32m 36\u001b[0m )\n\u001b[1;32m---> 37\u001b[0m df_zhibiaoshuju,df_zhibiaoliebiao \u001b[38;5;241m=\u001b[39m etadata\u001b[38;5;241m.\u001b[39mget_eta_api_pp_data(data_set\u001b[38;5;241m=\u001b[39mdata_set,dataset\u001b[38;5;241m=\u001b[39mdataset) \u001b[38;5;66;03m# 原始数据,未处理\u001b[39;00m\n\u001b[0;32m 39\u001b[0m \u001b[38;5;66;03m# 数据处理\u001b[39;00m\n\u001b[0;32m 40\u001b[0m df \u001b[38;5;241m=\u001b[39m datachuli_juxiting(df_zhibiaoshuju,df_zhibiaoliebiao,y \u001b[38;5;241m=\u001b[39m y,dataset\u001b[38;5;241m=\u001b[39mdataset,add_kdj\u001b[38;5;241m=\u001b[39madd_kdj,is_timefurture\u001b[38;5;241m=\u001b[39mis_timefurture,end_time\u001b[38;5;241m=\u001b[39mend_time) \n", "File \u001b[1;32md:\\code\\PriceForecast\\lib\\dataread.py:1275\u001b[0m, in \u001b[0;36mEtaReader.get_eta_api_pp_data\u001b[1;34m(self, data_set, dataset)\u001b[0m\n\u001b[0;32m 1273\u001b[0m response \u001b[38;5;241m=\u001b[39m requests\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mclassifylisturl, headers\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mheaders) \n\u001b[0;32m 1274\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m requests\u001b[38;5;241m.\u001b[39mexceptions\u001b[38;5;241m.\u001b[39mRequestException \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m-> 1275\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m请求失败,请确认是否为内网环境: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00me\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m,\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\033\u001b[39;00m\u001b[38;5;124m[0m\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 1277\u001b[0m \u001b[38;5;66;03m# 检查响应状态码 \u001b[39;00m\n\u001b[0;32m 1278\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m response\u001b[38;5;241m.\u001b[39mstatus_code \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m200\u001b[39m: \n\u001b[0;32m 1279\u001b[0m \u001b[38;5;66;03m# 获取成功, 处理响应内容\u001b[39;00m\n", "\u001b[1;31mException\u001b[0m: (\"请求失败,请确认是否为内网环境: HTTPConnectionPool(host='10.189.2.78', port=8108): Max retries exceeded with url: /v1/edb/classify/list?ClassifyType= (Caused by ConnectTimeoutError(, 'Connection to 10.189.2.78 timed out. (connect timeout=None)'))\", '\\x1b[0m')" ] } ], "source": [ "# 读取配置\n", "from lib.dataread import *\n", "from lib.tools import *\n", "from models.nerulforcastmodels import ex_Model,model_losss,brent_export_pdf,tansuanli_export_pdf,pp_export_pdf,model_losss_juxiting\n", "\n", "import glob\n", "import torch\n", "torch.set_float32_matmul_precision(\"high\")\n", "\n", "sqlitedb = SQLiteHandler(db_name) \n", "sqlitedb.connect()\n", "\n", "signature = BinanceAPI(APPID, SECRET)\n", "etadata = EtaReader(signature=signature,\n", " classifylisturl = classifylisturl,\n", " classifyidlisturl=classifyidlisturl,\n", " edbcodedataurl=edbcodedataurl,\n", " edbcodelist=edbcodelist,\n", " edbdatapushurl=edbdatapushurl,\n", " edbdeleteurl=edbdeleteurl,\n", " edbbusinessurl=edbbusinessurl\n", " )\n", "# 获取数据\n", "if is_eta:\n", " # eta数据\n", " logger.info('从eta获取数据...')\n", " signature = BinanceAPI(APPID, SECRET)\n", " etadata = EtaReader(signature=signature,\n", " classifylisturl = classifylisturl,\n", " classifyidlisturl=classifyidlisturl,\n", " edbcodedataurl=edbcodedataurl,\n", " edbcodelist=edbcodelist,\n", " edbdatapushurl=edbdatapushurl,\n", " edbdeleteurl=edbdeleteurl,\n", " edbbusinessurl=edbbusinessurl,\n", " )\n", " df_zhibiaoshuju,df_zhibiaoliebiao = etadata.get_eta_api_pp_data(data_set=data_set,dataset=dataset) # 原始数据,未处理\n", "\n", " # 数据处理\n", " df = datachuli_juxiting(df_zhibiaoshuju,df_zhibiaoliebiao,y = y,dataset=dataset,add_kdj=add_kdj,is_timefurture=is_timefurture,end_time=end_time) \n", "\n", "else:\n", " logger.info('读取本地数据:'+os.path.join(dataset,data_set))\n", " df = getdata_juxiting(filename=os.path.join(dataset,data_set),y=y,dataset=dataset,add_kdj=add_kdj,is_timefurture=is_timefurture,end_time=end_time) # 原始数据,未处理\n", "\n", "# 更改预测列名称\n", "df.rename(columns={y:'y'},inplace=True)\n", " \n", "if is_edbnamelist:\n", " df = df[edbnamelist] \n", "df.to_csv(os.path.join(dataset,'指标数据.csv'), index=False)\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "id": "ae059224-976c-4839-b455-f81da7f25179", "metadata": {}, "outputs": [], "source": [ "# 保存最新日期的y值到数据库\n", "# 取第一行数据存储到数据库中\n", "first_row = df[['ds','y']].tail(1)\n", "# 将最新真实值保存到数据库\n", "if not sqlitedb.check_table_exists('trueandpredict'):\n", " first_row.to_sql('trueandpredict',sqlitedb.connection,index=False)\n", "else:\n", " for row in first_row.itertuples(index=False):\n", " row_dict = row._asdict()\n", " row_dict['ds'] = row_dict['ds'].strftime('%Y-%m-%d %H:%M:%S')\n", " check_query = sqlitedb.select_data('trueandpredict',where_condition = f\"ds = '{row.ds}'\")\n", " if len(check_query) > 0:\n", " set_clause = \", \".join([f\"{key} = '{value}'\" for key, value in row_dict.items()])\n", " sqlitedb.update_data('trueandpredict',set_clause,where_condition = f\"ds = '{row.ds}'\")\n", " continue\n", " sqlitedb.insert_data('trueandpredict',tuple(row_dict.values()),columns=row_dict.keys())\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "id": "abb597fc-c5f3-4d76-8099-5eff358cb634", "metadata": {}, "outputs": [], "source": [ "import datetime\n", "# 判断当前日期是不是周一\n", "is_weekday = datetime.datetime.now().weekday() == 1\n", "if is_weekday:\n", " logger.info('今天是周一,更新预测模型')\n", " # 计算最近20天预测残差最低的模型名称\n", "\n", " model_results = sqlitedb.select_data('trueandpredict',order_by = \"ds DESC\",limit = \"20\")\n", " # 删除空值率为40%以上的列\n", " print(model_results.shape)\n", " model_results = model_results.dropna(thresh=len(model_results)*0.6,axis=1)\n", " model_results = model_results.dropna()\n", " print(model_results.shape)\n", " modelnames = model_results.columns.to_list()[2:] \n", " for col in model_results[modelnames].select_dtypes(include=['object']).columns:\n", " model_results[col] = model_results[col].astype(np.float32)\n", " # 计算每个预测值与真实值之间的偏差率\n", " for model in modelnames:\n", " model_results[f'{model}_abs_error_rate'] = abs(model_results['y'] - model_results[model]) / model_results['y']\n", "\n", " # 获取每行对应的最小偏差率值\n", " min_abs_error_rate_values = model_results.apply(lambda row: row[[f'{model}_abs_error_rate' for model in modelnames]].min(), axis=1)\n", " # 获取每行对应的最小偏差率值对应的列名\n", " min_abs_error_rate_column_name = model_results.apply(lambda row: row[[f'{model}_abs_error_rate' for model in modelnames]].idxmin(), axis=1)\n", " print(min_abs_error_rate_column_name)\n", " # 将列名索引转换为列名\n", " min_abs_error_rate_column_name = min_abs_error_rate_column_name.map(lambda x: x.split('_')[0])\n", " # 取出现次数最多的模型名称\n", " most_common_model = min_abs_error_rate_column_name.value_counts().idxmax()\n", " logger.info(f\"最近20天预测残差最低的模型名称:{most_common_model}\")\n", "\n", " # 保存结果到数据库\n", " \n", " if not sqlitedb.check_table_exists('most_model'):\n", " sqlitedb.create_table('most_model',columns=\"ds datetime, most_common_model TEXT\")\n", " sqlitedb.insert_data('most_model',(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),most_common_model,),columns=('ds','most_common_model',))\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "id": "ade7026e-8cf2-405f-a2da-9e90f364adab", "metadata": {}, "outputs": [], "source": [ "if is_corr:\n", " df = corr_feature(df=df)\n", "\n", "df1 = df.copy() # 备份一下,后面特征筛选完之后加入ds y 列用\n", "logger.info(f\"开始训练模型...\")\n", "row,col = df.shape\n" ] }, { "cell_type": "code", "execution_count": null, "id": "dfef57d8-36da-423b-bbe7-05a13e15f71b", "metadata": {}, "outputs": [], "source": [ "now = datetime.datetime.now().strftime('%Y%m%d%H%M%S')\n", "# ex_Model(df,\n", "# horizon=horizon,\n", "# input_size=input_size,\n", "# train_steps=train_steps,\n", "# val_check_steps=val_check_steps,\n", "# early_stop_patience_steps=early_stop_patience_steps,\n", "# is_debug=is_debug,\n", "# dataset=dataset,\n", "# is_train=is_train,\n", "# is_fivemodels=is_fivemodels,\n", "# val_size=val_size,\n", "# test_size=test_size,\n", "# settings=settings,\n", "# now=now,\n", "# etadata = etadata,\n", "# modelsindex = modelsindex,\n", "# data = data,\n", "# is_eta=is_eta,\n", "# )\n" ] }, { "cell_type": "code", "execution_count": null, "id": "0e5b6f30-b7ca-4718-97a3-48b54156e07f", "metadata": {}, "outputs": [], "source": [ "logger.info('模型训练完成')\n", "# # 模型评估\n", "\n", "pd.set_option('display.max_columns', 100)\n", "\n", "# 计算预测评估指数\n", "def model_losss_juxiting(sqlitedb):\n", " global dataset\n", " most_model = [sqlitedb.select_data('most_model',columns=['most_common_model'],order_by='ds desc',limit=1).values[0][0]]\n", " most_model_name = most_model[0]\n", "\n", " # 预测数据处理 predict\n", " df_combined = loadcsv(os.path.join(dataset,\"cross_validation.csv\")) \n", " df_combined = dateConvert(df_combined)\n", " # 删除空列\n", " df_combined.dropna(axis=1,inplace=True)\n", " # 删除缺失值,预测过程不能有缺失值\n", " df_combined.dropna(inplace=True) \n", " # 其他列转为数值类型\n", " df_combined = df_combined.astype({col: 'float32' for col in df_combined.columns if col not in ['cutoff','ds'] })\n", " # 使用 groupby 和 transform 结合 lambda 函数来获取每个分组中 cutoff 的最小值,并创建一个新的列来存储这个最大值\n", " df_combined['max_cutoff'] = df_combined.groupby('ds')['cutoff'].transform('max')\n", "\n", " # 然后筛选出那些 cutoff 等于 max_cutoff 的行,这样就得到了每个分组中 cutoff 最大的行,并保留了其他列\n", " df_combined = df_combined[df_combined['cutoff'] == df_combined['max_cutoff']]\n", " # 删除模型生成的cutoff列\n", " df_combined.drop(columns=['cutoff', 'max_cutoff'], inplace=True)\n", " # 获取模型名称\n", " modelnames = df_combined.columns.to_list()[1:] \n", " if 'y' in modelnames:\n", " modelnames.remove('y')\n", " df_combined3 = df_combined.copy() # 备份df_combined,后面画图需要\n", "\n", "\n", " # 空的列表存储每个模型的MSE、RMSE、MAE、MAPE、SMAPE\n", " cellText = []\n", "\n", " # 遍历模型名称,计算模型评估指标 \n", " for model in modelnames:\n", " modelmse = mse(df_combined['y'], df_combined[model])\n", " modelrmse = rmse(df_combined['y'], df_combined[model])\n", " modelmae = mae(df_combined['y'], df_combined[model])\n", " # modelmape = mape(df_combined['y'], df_combined[model])\n", " # modelsmape = smape(df_combined['y'], df_combined[model])\n", " # modelr2 = r2_score(df_combined['y'], df_combined[model])\n", " cellText.append([model,round(modelmse, 3), round(modelrmse, 3), round(modelmae, 3)])\n", " \n", " model_results3 = pd.DataFrame(cellText,columns=['模型(Model)','平均平方误差(MSE)', '均方根误差(RMSE)', '平均绝对误差(MAE)'])\n", " # 按MSE降序排列\n", " model_results3 = model_results3.sort_values(by='平均平方误差(MSE)', ascending=True)\n", " model_results3.to_csv(os.path.join(dataset,\"model_evaluation.csv\"),index=False)\n", " modelnames = model_results3['模型(Model)'].tolist()\n", " allmodelnames = modelnames.copy()\n", " # 保存5个最佳模型的名称\n", " if len(modelnames) > 5:\n", " modelnames = modelnames[0:5]\n", " with open(os.path.join(dataset,\"best_modelnames.txt\"), 'w') as f:\n", " f.write(','.join(modelnames) + '\\n')\n", "\n", " # 预测值与真实值对比图\n", " plt.rcParams['font.sans-serif'] = ['SimHei']\n", " plt.figure(figsize=(15, 10))\n", " for n,model in enumerate(modelnames[:5]):\n", " plt.subplot(3, 2, n+1)\n", " plt.plot(df_combined3['ds'], df_combined3['y'], label='真实值')\n", " plt.plot(df_combined3['ds'], df_combined3[model], label=model)\n", " plt.legend()\n", " plt.xlabel('日期')\n", " plt.ylabel('价格')\n", " plt.title(model+'拟合')\n", " plt.subplots_adjust(hspace=0.5)\n", " plt.savefig(os.path.join(dataset,'预测值与真实值对比图.png'), bbox_inches='tight')\n", " plt.close()\n", " \n", "\n", " # # 根据最佳模型的绝对误差的平均比例,最佳模型乘平均比例的百分比,确定最大最小值\n", " import heapq # 使用堆来找到最大和最小的值\n", " def find_min_max_within_quantile(row):\n", " true_value = row['y']\n", " row.drop(['ds','y'], inplace=True)\n", " row = row.astype(float).round(2)\n", "\n", " max_heap = []\n", " min_heap = []\n", " for col in row.index:\n", " # 对比真实值进行分类\n", " if row[col] < true_value:\n", " heapq.heappush(min_heap, row[col])\n", " elif row[col] > true_value:\n", " heapq.heappush(max_heap, -row[col]) # 使用负号来实现最大堆\n", "\n", " if len(max_heap) == 1:\n", " max_y = max_heap[0]\n", " elif len(max_heap) == 0:\n", " max_y = -min_heap[-1]\n", " else:\n", " max_y = heapq.nsmallest(2, max_heap)[1]\n", "\n", " if len(min_heap) < 2 :\n", " min_y = -max_heap[-1]\n", " else:\n", " min_y = heapq.nsmallest(2, min_heap)[-1]\n", "\n", "\n", " # 获取最大和最小的值\n", " q10 = min_y \n", " q90 = -max_y\n", "\n", " # 获取最大和最小的模型名称\n", " min_model = row[row == q10].idxmin()\n", " max_model = row[row == q90].idxmax()\n", "\n", " # 设置上下界比例\n", " rote = 1\n", "\n", " q10 = q10 * rote\n", " q90 = q90 * rote\n", "\n", " logger.info(min_model,q10,max_model,q90)\n", "\n", " return pd.Series([q10, q90, min_model, max_model], index=['min_within_quantile', 'max_within_quantile', 'min_model', 'max_model'])\n", " # # 遍历行\n", " df_combined3[['min_within_quantile', 'max_within_quantile','min_model','max_model']] = df_combined3.apply(find_min_max_within_quantile, axis=1)\n", " df_combined = df_combined.round(4)\n", " print(df_combined3)\n", "\n", "\n", " '''\n", " # # 根据真实值y确定最大最小值,去掉最高最低的预测值\n", " import heapq # 使用堆来找到最大和最小的值\n", " def find_min_max_within_quantile(row):\n", " true_value = row['y']\n", " row.drop(['ds','y'], inplace=True)\n", " row = row.astype(float).round(2)\n", "\n", " max_heap = []\n", " min_heap = []\n", " for col in row.index:\n", " # 对比真实值进行分类\n", " if row[col] < true_value:\n", " heapq.heappush(min_heap, row[col])\n", " elif row[col] > true_value:\n", " heapq.heappush(max_heap, -row[col]) # 使用负号来实现最大堆\n", "\n", " if len(max_heap) == 1:\n", " max_y = max_heap[0]\n", " elif len(max_heap) == 0:\n", " max_y = -min_heap[-1]\n", " else:\n", " max_y = heapq.nsmallest(2, max_heap)[1]\n", "\n", " if len(min_heap) < 2 :\n", " min_y = -max_heap[-1]\n", " else:\n", " min_y = heapq.nsmallest(2, min_heap)[-1]\n", "\n", "\n", " # 获取最大和最小的值\n", " q10 = min_y \n", " q90 = -max_y\n", "\n", " # 获取最大和最小的模型名称\n", " min_model = row[row == q10].idxmin()\n", " max_model = row[row == q90].idxmax()\n", "\n", " # 设置上下界比例\n", " rote = 1\n", "\n", " q10 = q10 * rote\n", " q90 = q90 * rote\n", "\n", " logger.info(min_model,q10,max_model,q90)\n", "\n", " return pd.Series([q10, q90, min_model, max_model], index=['min_within_quantile', 'max_within_quantile', 'min_model', 'max_model'])\n", " # # 遍历行\n", " df_combined3[['min_within_quantile', 'max_within_quantile','min_model','max_model']] = df_combined3.apply(find_min_max_within_quantile, axis=1)\n", " df_combined = df_combined.round(4)\n", " print(df_combined3)\n", " '''\n", "\n", " '''\n", " #使用最佳五个模型进行绘图\n", " best_models = pd.read_csv(os.path.join(dataset,'best_modelnames.txt'),header=None).values.flatten().tolist()\n", " def find_min_max_within_quantile(row):\n", " row = row[best_models]\n", " q10 = row.min()\n", " q90 = row.max()\n", " # 获取 row行最大最小值模型名称\n", " min_model = row[row == q10].idxmin()\n", " max_model = row[row == q90].idxmin()\n", " \n", " # # 判断flot值是否为空值\n", " # if pd.isna(q10) or pd.isna(q90):\n", " return pd.Series([q10, q90,min_model,max_model], index=['min_within_quantile','max_within_quantile','min_model','max_model'])\n", "\n", " # 遍历行\n", " df_combined3[['min_within_quantile', 'max_within_quantile','min_model','max_model']] = df_combined3.apply(find_min_max_within_quantile, axis=1)\n", " df_combined = df_combined.round(4)\n", " print(df_combined3)\n", " '''\n", " \n", " '''\n", " # 通道使用模型评估前80%作为置信度\n", " def find_min_max_within_quantile(row):\n", " row.drop(['ds','y'], inplace=True)\n", " row = row.astype(float).round(2)\n", "\n", " row_sorted = row\n", " # 计算 10% 和 90% 位置的索引\n", " index_10 = 0\n", " index_90 = int(len(row_sorted) * 0.8)\n", " q10 = row_sorted[index_10]\n", " q90 = row_sorted[index_90]\n", " # 获取模型名称\n", " min_model = row[row == q10].idxmin()\n", " max_model = row[row == q90].idxmin()\n", "\n", " \n", " # # 判断flot值是否为空值\n", " # if pd.isna(q10) or pd.isna(q90):\n", " return pd.Series([q10, q90,min_model,max_model], index=['min_within_quantile','max_within_quantile','min_model','max_model'])\n", "\n", " # 重新排列\n", " df_combined3 = df_combined3[['ds','y'] + allmodelnames]\n", " # 遍历行\n", " df_combined3[['min_within_quantile', 'max_within_quantile','min_model','max_model']] = df_combined3.apply(find_min_max_within_quantile, axis=1)\n", " df_combined = df_combined.round(4)\n", " print(df_combined3)\n", "\n", " '''\n", " \n", " '''\n", " # 通道使用预测模型的80%置信度\n", " def find_min_max_within_quantile(row):\n", " row.drop(['ds','y'], inplace=True)\n", " row = row.astype(float).round(2)\n", "\n", " row_sorted = row.sort_values(ascending=True).reset_index(drop=True)\n", " # 计算 10% 和 90% 位置的索引\n", " index_10 = int(len(row_sorted) * 0.1)\n", " index_90 = int(len(row_sorted) * 0.9)\n", " q10 = row_sorted[index_10]\n", " q90 = row_sorted[index_90]\n", " # 获取模型名称\n", " min_model = row[row == q10].idxmin()\n", " max_model = row[row == q90].idxmin()\n", "\n", " \n", " # # 判断flot值是否为空值\n", " # if pd.isna(q10) or pd.isna(q90):\n", " return pd.Series([q10, q90,min_model,max_model], index=['min_within_quantile','max_within_quantile','min_model','max_model'])\n", "\n", " # 遍历行\n", " df_combined3[['min_within_quantile', 'max_within_quantile','min_model','max_model']] = df_combined3.apply(find_min_max_within_quantile, axis=1)\n", " df_combined = df_combined.round(4)\n", " print(df_combined3)\n", " '''\n", " \n", "\n", " '''\n", " # 计算波动率,取近60日波动率的10%和90%分位数确定通道上下界\n", " df_combined3['volatility'] = df_combined3['y'].pct_change().round(4)\n", " # 计算近60日的波动率 10% 90%分位数\n", " df_combined3['quantile_10'] = df_combined3['volatility'].rolling(60).quantile(0.1)\n", " df_combined3['quantile_90'] = df_combined3['volatility'].rolling(60).quantile(0.9)\n", " df_combined3 = df_combined3.round(4)\n", " # 计算分位数对应的价格\n", " df_combined3['quantile_10_price'] = df_combined3['y'] * (1 + df_combined3['quantile_10'])\n", " df_combined3['quantile_90_price'] = df_combined3['y'] * (1 + df_combined3['quantile_90'])\n", "\n", " # 遍历行\n", " def find_min_max_within_quantile(row):\n", " # 获取分位数10%和90%的值\n", " q10 = row['quantile_10_price']\n", " q90 = row['quantile_90_price']\n", " \n", " # 判断flot值是否为空值\n", " if pd.isna(q10) or pd.isna(q90):\n", " return pd.Series([None, None, None, None], index=['min_within_quantile','max_within_quantile','min_model','max_model'])\n", " \n", " # 初始化最小和最大值为None\n", " min_value = None\n", " max_value = None\n", " min_value_model = ''\n", " max_value_model = ''\n", "\n", " # 遍历指定列,找出在分位数范围内的最大最小值\n", " for model in modelnames:\n", " value = row[model]\n", " if value >= q10 and value <= q90:\n", " if min_value is None or value < min_value:\n", " min_value = value\n", " min_value_model = model\n", "\n", " if max_value is None or value > max_value:\n", " max_value = value\n", " max_value_model = model\n", " \n", " # 返回最大最小值\n", " return pd.Series([min_value, max_value,min_value_model,max_value_model], index=['min_within_quantile', 'max_within_quantile','min_model','max_model'])\n", "\n", " # 应用函数到每一行\n", " df_combined3[['min_within_quantile', 'max_within_quantile','min_model','max_model']] = df_combined3.apply(find_min_max_within_quantile, axis=1)\n", "\n", " \n", " '''\n", " \n", " # 去除有空值的行\n", " df_combined3.dropna(inplace=True)\n", " # 保存到数据库\n", " df_combined3.to_sql('testandpredict_groupby', sqlitedb.connection, if_exists='replace', index=False)\n", " df_combined3.to_csv(os.path.join(dataset,\"testandpredict_groupby.csv\"),index=False)\n", "\n", " '''\n", " # 去掉方差最大的模型,其余模型预测最大最小值确定通道边界\n", " \n", " \n", " # 历史数据+预测数据\n", " # 拼接未来时间预测\n", " df_predict = loadcsv(os.path.join(dataset,'predict.csv'))\n", " df_predict.drop('unique_id',inplace=True,axis=1)\n", " df_predict.dropna(axis=1,inplace=True)\n", " df_predict2 = df_predict.copy()\n", " try:\n", " df_predict['ds'] = pd.to_datetime(df_predict['ds'],format=r'%Y-%m-%d')\n", " except ValueError :\n", " df_predict['ds'] = pd.to_datetime(df_predict['ds'],format=r'%Y/%m/%d')\n", "\n", " # 取第一行数据存储到数据库中\n", " first_row = df_predict.head(1)\n", " first_row['ds'] = first_row['ds'].dt.strftime('%Y-%m-%d 00:00:00')\n", "\n", " # # 将预测结果保存到数据库\n", " df_combined3 = pd.concat([df_combined3, df_predict]).reset_index(drop=True)\n", " # # 判断 df 的数值列转为float\n", " for col in df_combined3.columns:\n", " try:\n", " if col != 'ds':\n", " df_combined3[col] = df_combined3[col].astype(float)\n", " df_combined3[col] = df_combined3[col].round(2)\n", " except ValueError:\n", " pass\n", " df_combined3.to_csv(os.path.join(dataset,\"testandpredict_groupby.csv\"),index=False)\n", " df_combined3['ds'] = df_combined3['ds'].dt.strftime('%Y-%m-%d 00:00:00')\n", " # # 判断表存在\n", " if not sqlitedb.check_table_exists('testandpredict_groupby'):\n", " df_combined3.to_sql('testandpredict_groupby',sqlitedb.connection,index=False)\n", " else:\n", " for row in df_combined3.itertuples(index=False):\n", " row_dict = row._asdict()\n", " check_query = sqlitedb.select_data('testandpredict_groupby',where_condition = f\"ds = '{row.ds}'\")\n", " if len(check_query) > 0:\n", " set_clause = \", \".join([f\"{key} = '{value}'\" for key, value in row_dict.items()])\n", " sqlitedb.update_data('testandpredict_groupby',set_clause,where_condition = f\"ds = '{row.ds}'\")\n", " continue\n", " sqlitedb.insert_data('testandpredict_groupby',tuple(row_dict.values()),columns=row_dict.keys())\n", "\n", " ten_models = allmodelnames\n", " # 计算每个模型的方差\n", " variances = df_combined3[ten_models].var()\n", " # 找到方差最大的模型\n", " max_variance_model = variances.idxmax()\n", " # 打印方差最大的模型\n", " print(\"方差最大的模型是:\", max_variance_model)\n", " # 去掉方差最大的模型\n", " df_combined3 = df_combined3.drop(columns=[max_variance_model])\n", " if max_variance_model in allmodelnames:\n", " allmodelnames.remove(max_variance_model)\n", " df_combined3['min'] = df_combined3[allmodelnames].min(axis=1)\n", " df_combined3['max'] = df_combined3[allmodelnames].max(axis=1)\n", " print(df_combined3[['min','max']])\n", " # 历史价格+预测价格\n", " df_combined3 = df_combined3[-50:] # 取50个数据点画图\n", " plt.figure(figsize=(20, 10))\n", " plt.plot(df_combined3['ds'], df_combined3['y'], label='真实值',marker='o')\n", " plt.plot(df_combined3['ds'], df_combined3[most_model], label=most_model_name)\n", " plt.fill_between(df_combined3['ds'], df_combined3['min'], df_combined3['max'], alpha=0.2)\n", " plt.grid(True)\n", " # # 显示历史值\n", " for i, j in zip(df_combined3['ds'][:-5], df_combined3['y'][:-5]):\n", " plt.text(i, j, str(j), ha='center', va='bottom')\n", " # 当前日期画竖虚线\n", " plt.axvline(x=df_combined3['ds'].iloc[-horizon], color='r', linestyle='--')\n", " plt.legend()\n", " plt.xlabel('日期')\n", " plt.ylabel('价格')\n", "\n", " plt.savefig(os.path.join(dataset,'历史价格-预测值.png'), bbox_inches='tight')\n", " plt.close()\n", " '''\n", "\n", " # # 历史数据+预测数据\n", " # # 拼接未来时间预测\n", " df_predict = pd.read_csv(os.path.join(dataset,'predict.csv'))\n", " df_predict.drop('unique_id',inplace=True,axis=1)\n", " df_predict.dropna(axis=1,inplace=True)\n", "\n", " try:\n", " df_predict['ds'] = pd.to_datetime(df_predict['ds'],format=r'%Y-%m-%d')\n", " except ValueError :\n", " df_predict['ds'] = pd.to_datetime(df_predict['ds'],format=r'%Y/%m/%d')\n", "\n", " def first_row_to_database(df):\n", " # # 取第一行数据存储到数据库中\n", " first_row = df.head(1)\n", " first_row['ds'] = first_row['ds'].dt.strftime('%Y-%m-%d 00:00:00')\n", " # 将预测结果保存到数据库\n", " if not sqlitedb.check_table_exists('trueandpredict'):\n", " first_row.to_sql('trueandpredict',sqlitedb.connection,index=False)\n", " else:\n", " for col in first_row.columns:\n", " sqlitedb.add_column_if_not_exists('trueandpredict',col,'TEXT')\n", " for row in first_row.itertuples(index=False):\n", " row_dict = row._asdict()\n", " columns=row_dict.keys()\n", " check_query = sqlitedb.select_data('trueandpredict',where_condition = f\"ds = '{row.ds}'\")\n", " if len(check_query) > 0:\n", " set_clause = \", \".join([f\"{key} = '{value}'\" for key, value in row_dict.items()])\n", " sqlitedb.update_data('trueandpredict',set_clause,where_condition = f\"ds = '{row.ds}'\")\n", " continue\n", " sqlitedb.insert_data('trueandpredict',tuple(row_dict.values()),columns=columns)\n", "\n", " first_row_to_database(df_predict)\n", " def find_most_common_model():\n", " # 最多频率的模型名称\n", " min_model_max_frequency_model = df_combined3['min_model'].tail(20).value_counts().idxmax()\n", " max_model_max_frequency_model = df_combined3['max_model'].tail(20).value_counts().idxmax()\n", " if min_model_max_frequency_model == max_model_max_frequency_model:\n", " # 取20天第二多的模型\n", " max_model_max_frequency_model = df_combined3['max_model'].tail(20).value_counts().nlargest(2).index[1]\n", "\n", " df_predict['min_model'] = min_model_max_frequency_model\n", " df_predict['max_model'] = max_model_max_frequency_model\n", " df_predict['min_within_quantile'] = df_predict[min_model_max_frequency_model]\n", " df_predict['max_within_quantile'] = df_predict[max_model_max_frequency_model]\n", "\n", "\n", " # find_most_common_model()\n", "\n", " df_predict2 = df_predict.copy()\n", " df_predict2['ds'] = pd.to_datetime(df_predict2['ds'])\n", " df_predict2['ds'] = df_predict2['ds'].dt.strftime('%Y-%m-%d 00:00:00')\n", "\n", "\n", " def _add_abs_error_rate():\n", " # 计算每个预测值与真实值之间的偏差率\n", " for model in allmodelnames:\n", " df_combined3[f'{model}_abs_error_rate'] = abs(df_combined3['y'] - df_combined3[model]) / df_combined3['y']\n", "\n", " # 获取每行对应的最小偏差率值\n", " min_abs_error_rate_values = df_combined3.apply(lambda row: row[[f'{model}_abs_error_rate' for model in allmodelnames]].min(), axis=1)\n", " # 获取每行对应的最小偏差率值对应的列名\n", " min_abs_error_rate_column_name = df_combined3.apply(lambda row: row[[f'{model}_abs_error_rate' for model in allmodelnames]].idxmin(), axis=1) \n", " # 将列名索引转换为列名\n", " min_abs_error_rate_column_name = min_abs_error_rate_column_name.map(lambda x: x.split('_')[0])\n", " # 获取最小偏差率对应的模型的预测值\n", " min_abs_error_rate_predictions = df_combined3.apply(lambda row: row[min_abs_error_rate_column_name[row.name]], axis=1)\n", " # 将最小偏差率对应的模型的预测值添加到DataFrame中\n", " df_combined3['min_abs_error_rate_prediction'] = min_abs_error_rate_predictions\n", " df_combined3['min_abs_error_rate_column_name'] = min_abs_error_rate_column_name\n", "\n", " _add_abs_error_rate()\n", "\n", " df_combined3 = pd.concat([df_combined3, df_predict]).reset_index(drop=True)\n", " # 判断 df 的数值列转为float\n", " for col in df_combined3.columns:\n", " try:\n", " if col != 'ds':\n", " df_combined3[col] = df_combined3[col].astype(float)\n", " df_combined3[col] = df_combined3[col].round(2)\n", " except ValueError:\n", " pass\n", " df_combined3.to_csv(os.path.join(dataset,\"df_combined3.csv\"),index=False) \n", " \n", " # 历史价格+预测价格\n", " # 将预测结果保存到数据库\n", " # 判断表存在\n", " # if not sqlitedb.check_table_exists('testandpredict_groupby'):\n", " # df_combined3.to_sql('testandpredict_groupby',sqlitedb.connection,index=False)\n", " # else:\n", " # for row in df_combined3.itertuples(index=False):\n", " # row_dict = row._asdict()\n", " # check_query = sqlitedb.select_data('testandpredict_groupby',where_condition = f\"ds = '{row.ds}'\")\n", " # if len(check_query) > 0:\n", " # set_clause = \", \".join([f\"{key} = '{value}'\" for key, value in row_dict.items()])\n", " # sqlitedb.update_data('testandpredict_groupby',set_clause,where_condition = f\"ds = '{row.ds}'\")\n", " # continue\n", " # sqlitedb.insert_data('testandpredict_groupby',tuple(row_dict.values()),columns=row_dict.keys())\n", " \n", " \n", " def _plt_predict_ture(df):\n", " df = df[-50:] # 取50个数据点画图\n", " # 历史价格\n", " plt.figure(figsize=(20, 10))\n", " plt.plot(df['ds'], df['y'], label='真实值')\n", " # 颜色填充\n", " plt.fill_between(df['ds'], df['min_within_quantile'], df['max_within_quantile'], alpha=0.2)\n", " # plt.plot(df_combined3['ds'], df_combined3['min_abs_error_rate_prediction'], label='最小绝对误差', linestyle='--', color='orange')\n", " # 网格\n", " plt.grid(True)\n", " # 显示历史值\n", " for i, j in zip(df['ds'], df['y']):\n", " plt.text(i, j, str(j), ha='center', va='bottom')\n", "\n", " for model in most_model:\n", " plt.plot(df['ds'], df[model], label=model,marker='o')\n", " # 当前日期画竖虚线\n", " plt.axvline(x=df['ds'].iloc[-horizon], color='r', linestyle='--')\n", " plt.legend()\n", " plt.xlabel('日期')\n", " plt.ylabel('价格')\n", " \n", " plt.savefig(os.path.join(dataset,'历史价格-预测值.png'), bbox_inches='tight')\n", " plt.close()\n", "\n", " def _plt_predict_table(df): \n", " # 预测值表格\n", " fig, ax = plt.subplots(figsize=(20, 6))\n", " ax.axis('off') # 关闭坐标轴\n", " # 数值保留2位小数\n", " df = df.round(2)\n", " df = df[-horizon:]\n", " df['Day'] = [f'Day_{i}' for i in range(1,horizon+1)]\n", " # Day列放到最前面\n", " df = df[['Day'] + list(df.columns[:-1])]\n", " table = ax.table(cellText=df.values, colLabels=df.columns, loc='center')\n", " #加宽表格\n", " table.auto_set_font_size(False)\n", " table.set_fontsize(10)\n", "\n", " # 设置表格样式,列数据最小的用绿色标识\n", " plt.savefig(os.path.join(dataset,'预测值表格.png'), bbox_inches='tight')\n", " plt.close()\n", " \n", " def _plt_model_results3():\n", " # 可视化评估结果\n", " plt.rcParams['font.sans-serif'] = ['SimHei']\n", " fig, ax = plt.subplots(figsize=(20, 10))\n", " ax.axis('off') # 关闭坐标轴\n", " table = ax.table(cellText=model_results3.values, colLabels=model_results3.columns, loc='center')\n", " # 加宽表格\n", " table.auto_set_font_size(False)\n", " table.set_fontsize(10)\n", "\n", " # 设置表格样式,列数据最小的用绿色标识\n", " plt.savefig(os.path.join(dataset,'模型评估.png'), bbox_inches='tight')\n", " plt.close()\n", "\n", " _plt_predict_ture(df_combined3)\n", " _plt_predict_table(df_combined3)\n", " _plt_model_results3()\n", "\n", " return model_results3\n", "\n", "\n", "\n", "logger.info('训练数据绘图ing')\n", "model_results3 = model_losss_juxiting(sqlitedb)\n", "\n", "logger.info('训练数据绘图end')\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "id": "85b557de-8235-4e27-b5b8-58b36dfe6724", "metadata": {}, "outputs": [], "source": [ "# 模型报告\n", "\n", "logger.info('制作报告ing')\n", "title = f'{settings}--{now}-预测报告' # 报告标题\n", "\n", "pp_export_pdf(dataset=dataset,num_models = 5 if is_fivemodels else 22,time=end_time,\n", " reportname=reportname,sqlitedb=sqlitedb),\n", "\n", "logger.info('制作报告end')\n", "logger.info('模型训练完成')" ] }, { "cell_type": "code", "execution_count": null, "id": "d4129e71-ee2c-4af1-81ed-fadf14efa206", "metadata": {}, "outputs": [], "source": [ "# 发送邮件\n", "m = SendMail(\n", " username=username,\n", " passwd=passwd,\n", " recv=recv,\n", " title=title,\n", " content=content,\n", " file=max(glob.glob(os.path.join(dataset,'*.pdf')), key=os.path.getctime),\n", " ssl=ssl,\n", ")\n", "# m.send_mail() \n" ] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.7" } }, "nbformat": 4, "nbformat_minor": 5 }