{ "cells": [ { "cell_type": "code", "execution_count": 136, "id": "9daadf20-caa6-4b25-901c-6cc3ef563f58", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(75, 9)\n", "(20, 4)\n", "(75, 12)\n", " id PREDICT_DATE CREAT_DATE MIN_PRICE MAX_PRICE HIGH_PRICE_x LOW_PRICE_x \\\n", "0 1 2024-11-26 2024-11-25 71.071556 76.006900 \n", "1 2 2024-11-27 2024-11-25 71.003624 75.580560 \n", "2 3 2024-11-28 2024-11-25 72.083850 76.204260 \n", "3 4 2024-11-29 2024-11-25 71.329730 75.703950 \n", "4 5 2024-12-02 2024-11-25 71.720825 76.264275 \n", "\n", " RIGHT_ROTE ds 序号 LOW_PRICE_y HIGH_PRICE_y \n", "0 2024-11-26 9.0 71.63 73.80 \n", "1 2024-11-27 8.0 71.71 72.85 \n", "2 2024-11-28 7.0 71.85 72.96 \n", "3 2024-11-29 6.0 71.75 73.34 \n", "4 2024-12-02 5.0 71.52 72.89 \n", " id PREDICT_DATE CREAT_DATE MIN_PRICE MAX_PRICE HIGH_PRICE_x \\\n", "70 71 2024-11-25 2024-11-22 74.53063 76.673140 \n", "71 72 2024-11-26 2024-11-22 74.44043 76.874565 \n", "72 73 2024-11-27 2024-11-22 74.66318 76.734130 \n", "73 74 2024-11-28 2024-11-22 74.70841 77.141050 \n", "74 75 2024-11-29 2024-11-22 74.70321 77.746170 \n", "\n", " LOW_PRICE_x RIGHT_ROTE ds 序号 LOW_PRICE_y HIGH_PRICE_y \n", "70 2024-11-25 10.0 72.30 74.83 \n", "71 2024-11-26 9.0 71.63 73.80 \n", "72 2024-11-27 8.0 71.71 72.85 \n", "73 2024-11-28 7.0 71.85 72.96 \n", "74 2024-11-29 6.0 71.75 73.34 \n" ] } ], "source": [ "import sqlite3\n", "import os\n", "import pandas as pd\n", "\n", "# 预测价格数据\n", "dbfilename = os.path.join(r'D:\\code\\PriceForecast\\yuanyoudataset','jbsh_yuanyou.db')\n", "conn = sqlite3.connect(dbfilename)\n", "query = 'SELECT * FROM accuracy'\n", "df1 = pd.read_sql_query(query, conn)\n", "df1['ds'] = df1['PREDICT_DATE']\n", "conn.close()\n", "print(df1.shape)\n", "\n", "\n", "\n", "\n", "# 最高最低价\n", "xlsfilename = os.path.join(r'D:\\code\\PriceForecast\\yuanyoudataset','数据项下载.xls')\n", "df2 = pd.read_excel(xlsfilename)[5:]\n", "df2 = df2.rename(columns = {'数据项名称':'ds','布伦特最低价':'LOW_PRICE','布伦特最高价':'HIGH_PRICE'})\n", "print(df2.shape)\n", "\n", "\n", "\n", "df = pd.merge(df1,df2,on=['ds'],how='left')\n", "\n", "df['ds'] = pd.to_datetime(df['ds'])\n", "df['PREDICT_DATE'] = pd.to_datetime(df['PREDICT_DATE'])\n", "df = df.reindex()\n", "\n", "print(df.shape)\n", "\n", "df.to_csv(os.path.join(r'D:\\code\\PriceForecast\\yuanyoudataset','123.csv'))\n", "# df = df[['ds','min_within_quantile','max_within_quantile']]\n", "\n", "\n", "\n", "# 打印数据框的前几行\n", "print(df.head())\n", "print(df.tail())\n" ] }, { "cell_type": "code", "execution_count": 137, "id": "e51c3fd0-6bff-45de-b8b6-971e7986c7a7", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " ds ACCURACY HIGH_PRICE_y LOW_PRICE_y MIN_PRICE MAX_PRICE\n", "0 2024-11-26 1.000000 73.80 71.63 71.071556 76.006900\n", "1 2024-11-27 1.000000 72.85 71.71 71.003624 75.580560\n", "2 2024-11-28 0.789324 72.96 71.85 72.083850 76.204260\n", "3 2024-11-29 1.000000 73.34 71.75 71.329730 75.703950\n", "4 2024-12-02 0.853412 72.89 71.52 71.720825 76.264275\n", ".. ... ... ... ... ... ...\n", "70 2024-11-25 0.118328 74.83 72.30 74.530630 76.673140\n", "71 2024-11-26 0.000000 73.80 71.63 74.440430 76.874565\n", "72 2024-11-27 0.000000 72.85 71.71 74.663180 76.734130\n", "73 2024-11-28 0.000000 72.96 71.85 74.708410 77.141050\n", "74 2024-11-29 0.000000 73.34 71.75 74.703210 77.746170\n", "\n", "[75 rows x 6 columns]\n" ] } ], "source": [ "# 定义一个函数来计算准确率\n", "def calculate_accuracy(row):\n", " if row['HIGH_PRICE_y'] > row['MIN_PRICE']:\n", " sorted_prices = sorted([row['LOW_PRICE_y'], row['MIN_PRICE'], row['MAX_PRICE'], row['HIGH_PRICE_y']])\n", " middle_diff = sorted_prices[2] - sorted_prices[1]\n", " price_range = row['HIGH_PRICE_y'] - row['LOW_PRICE_y']\n", " accuracy = middle_diff / price_range\n", " return accuracy\n", " else:\n", " return 0\n", "\n", "# 使用 apply 函数来应用计算准确率的函数\n", "\n", "columns = ['HIGH_PRICE_y','LOW_PRICE_y','MIN_PRICE','MAX_PRICE']\n", "df[columns] = df[columns].astype(float)\n", "df['ACCURACY'] = df.apply(calculate_accuracy, axis=1)\n", "\n", "\n", "# 打印结果\n", "print(df[['ds','ACCURACY',]+columns])\n", "\n", "df = df[['ds','ACCURACY','PREDICT_DATE','CREAT_DATE']+columns]" ] }, { "cell_type": "code", "execution_count": 138, "id": "0f942c69", "metadata": {}, "outputs": [], "source": [ "import datetime\n", "# ds 按周取\n", "df['Ds_Week'] = df['ds'].apply(lambda x: x.strftime('%U'))\n", "df['Pre_Week'] = df['PREDICT_DATE'].apply(lambda x: x.strftime('%U'))" ] }, { "cell_type": "code", "execution_count": 139, "id": "a7b05510", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | ds | \n", "ACCURACY | \n", "PREDICT_DATE | \n", "CREAT_DATE | \n", "HIGH_PRICE_y | \n", "LOW_PRICE_y | \n", "MIN_PRICE | \n", "MAX_PRICE | \n", "Ds_Week | \n", "Pre_Week | \n", "
---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "2024-11-26 | \n", "1.000000 | \n", "2024-11-26 | \n", "2024-11-25 | \n", "73.80 | \n", "71.63 | \n", "71.071556 | \n", "76.006900 | \n", "47 | \n", "47 | \n", "
1 | \n", "2024-11-27 | \n", "1.000000 | \n", "2024-11-27 | \n", "2024-11-25 | \n", "72.85 | \n", "71.71 | \n", "71.003624 | \n", "75.580560 | \n", "47 | \n", "47 | \n", "
2 | \n", "2024-11-28 | \n", "0.789324 | \n", "2024-11-28 | \n", "2024-11-25 | \n", "72.96 | \n", "71.85 | \n", "72.083850 | \n", "76.204260 | \n", "47 | \n", "47 | \n", "
3 | \n", "2024-11-29 | \n", "1.000000 | \n", "2024-11-29 | \n", "2024-11-25 | \n", "73.34 | \n", "71.75 | \n", "71.329730 | \n", "75.703950 | \n", "47 | \n", "47 | \n", "
4 | \n", "2024-12-02 | \n", "0.853412 | \n", "2024-12-02 | \n", "2024-11-25 | \n", "72.89 | \n", "71.52 | \n", "71.720825 | \n", "76.264275 | \n", "48 | \n", "48 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
70 | \n", "2024-11-25 | \n", "0.118328 | \n", "2024-11-25 | \n", "2024-11-22 | \n", "74.83 | \n", "72.30 | \n", "74.530630 | \n", "76.673140 | \n", "47 | \n", "47 | \n", "
71 | \n", "2024-11-26 | \n", "0.000000 | \n", "2024-11-26 | \n", "2024-11-22 | \n", "73.80 | \n", "71.63 | \n", "74.440430 | \n", "76.874565 | \n", "47 | \n", "47 | \n", "
72 | \n", "2024-11-27 | \n", "0.000000 | \n", "2024-11-27 | \n", "2024-11-22 | \n", "72.85 | \n", "71.71 | \n", "74.663180 | \n", "76.734130 | \n", "47 | \n", "47 | \n", "
73 | \n", "2024-11-28 | \n", "0.000000 | \n", "2024-11-28 | \n", "2024-11-22 | \n", "72.96 | \n", "71.85 | \n", "74.708410 | \n", "77.141050 | \n", "47 | \n", "47 | \n", "
74 | \n", "2024-11-29 | \n", "0.000000 | \n", "2024-11-29 | \n", "2024-11-22 | \n", "73.34 | \n", "71.75 | \n", "74.703210 | \n", "77.746170 | \n", "47 | \n", "47 | \n", "
75 rows × 10 columns
\n", "