PriceForecast/原油价格预测准确率计算.ipynb
2024-12-11 14:52:45 +08:00

483 lines
16 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": 136,
"id": "9daadf20-caa6-4b25-901c-6cc3ef563f58",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(75, 9)\n",
"(20, 4)\n",
"(75, 12)\n",
" id PREDICT_DATE CREAT_DATE MIN_PRICE MAX_PRICE HIGH_PRICE_x LOW_PRICE_x \\\n",
"0 1 2024-11-26 2024-11-25 71.071556 76.006900 \n",
"1 2 2024-11-27 2024-11-25 71.003624 75.580560 \n",
"2 3 2024-11-28 2024-11-25 72.083850 76.204260 \n",
"3 4 2024-11-29 2024-11-25 71.329730 75.703950 \n",
"4 5 2024-12-02 2024-11-25 71.720825 76.264275 \n",
"\n",
" RIGHT_ROTE ds 序号 LOW_PRICE_y HIGH_PRICE_y \n",
"0 2024-11-26 9.0 71.63 73.80 \n",
"1 2024-11-27 8.0 71.71 72.85 \n",
"2 2024-11-28 7.0 71.85 72.96 \n",
"3 2024-11-29 6.0 71.75 73.34 \n",
"4 2024-12-02 5.0 71.52 72.89 \n",
" id PREDICT_DATE CREAT_DATE MIN_PRICE MAX_PRICE HIGH_PRICE_x \\\n",
"70 71 2024-11-25 2024-11-22 74.53063 76.673140 \n",
"71 72 2024-11-26 2024-11-22 74.44043 76.874565 \n",
"72 73 2024-11-27 2024-11-22 74.66318 76.734130 \n",
"73 74 2024-11-28 2024-11-22 74.70841 77.141050 \n",
"74 75 2024-11-29 2024-11-22 74.70321 77.746170 \n",
"\n",
" LOW_PRICE_x RIGHT_ROTE ds 序号 LOW_PRICE_y HIGH_PRICE_y \n",
"70 2024-11-25 10.0 72.30 74.83 \n",
"71 2024-11-26 9.0 71.63 73.80 \n",
"72 2024-11-27 8.0 71.71 72.85 \n",
"73 2024-11-28 7.0 71.85 72.96 \n",
"74 2024-11-29 6.0 71.75 73.34 \n"
]
}
],
"source": [
"import sqlite3\n",
"import os\n",
"import pandas as pd\n",
"\n",
"# 预测价格数据\n",
"dbfilename = os.path.join(r'D:\\code\\PriceForecast\\yuanyoudataset','jbsh_yuanyou.db')\n",
"conn = sqlite3.connect(dbfilename)\n",
"query = 'SELECT * FROM accuracy'\n",
"df1 = pd.read_sql_query(query, conn)\n",
"df1['ds'] = df1['PREDICT_DATE']\n",
"conn.close()\n",
"print(df1.shape)\n",
"\n",
"\n",
"\n",
"\n",
"# 最高最低价\n",
"xlsfilename = os.path.join(r'D:\\code\\PriceForecast\\yuanyoudataset','数据项下载.xls')\n",
"df2 = pd.read_excel(xlsfilename)[5:]\n",
"df2 = df2.rename(columns = {'数据项名称':'ds','布伦特最低价':'LOW_PRICE','布伦特最高价':'HIGH_PRICE'})\n",
"print(df2.shape)\n",
"\n",
"\n",
"\n",
"df = pd.merge(df1,df2,on=['ds'],how='left')\n",
"\n",
"df['ds'] = pd.to_datetime(df['ds'])\n",
"df['PREDICT_DATE'] = pd.to_datetime(df['PREDICT_DATE'])\n",
"df = df.reindex()\n",
"\n",
"print(df.shape)\n",
"\n",
"df.to_csv(os.path.join(r'D:\\code\\PriceForecast\\yuanyoudataset','123.csv'))\n",
"# df = df[['ds','min_within_quantile','max_within_quantile']]\n",
"\n",
"\n",
"\n",
"# 打印数据框的前几行\n",
"print(df.head())\n",
"print(df.tail())\n"
]
},
{
"cell_type": "code",
"execution_count": 137,
"id": "e51c3fd0-6bff-45de-b8b6-971e7986c7a7",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ds ACCURACY HIGH_PRICE_y LOW_PRICE_y MIN_PRICE MAX_PRICE\n",
"0 2024-11-26 1.000000 73.80 71.63 71.071556 76.006900\n",
"1 2024-11-27 1.000000 72.85 71.71 71.003624 75.580560\n",
"2 2024-11-28 0.789324 72.96 71.85 72.083850 76.204260\n",
"3 2024-11-29 1.000000 73.34 71.75 71.329730 75.703950\n",
"4 2024-12-02 0.853412 72.89 71.52 71.720825 76.264275\n",
".. ... ... ... ... ... ...\n",
"70 2024-11-25 0.118328 74.83 72.30 74.530630 76.673140\n",
"71 2024-11-26 0.000000 73.80 71.63 74.440430 76.874565\n",
"72 2024-11-27 0.000000 72.85 71.71 74.663180 76.734130\n",
"73 2024-11-28 0.000000 72.96 71.85 74.708410 77.141050\n",
"74 2024-11-29 0.000000 73.34 71.75 74.703210 77.746170\n",
"\n",
"[75 rows x 6 columns]\n"
]
}
],
"source": [
"# 定义一个函数来计算准确率\n",
"def calculate_accuracy(row):\n",
" if row['HIGH_PRICE_y'] > row['MIN_PRICE']:\n",
" sorted_prices = sorted([row['LOW_PRICE_y'], row['MIN_PRICE'], row['MAX_PRICE'], row['HIGH_PRICE_y']])\n",
" middle_diff = sorted_prices[2] - sorted_prices[1]\n",
" price_range = row['HIGH_PRICE_y'] - row['LOW_PRICE_y']\n",
" accuracy = middle_diff / price_range\n",
" return accuracy\n",
" else:\n",
" return 0\n",
"\n",
"# 使用 apply 函数来应用计算准确率的函数\n",
"\n",
"columns = ['HIGH_PRICE_y','LOW_PRICE_y','MIN_PRICE','MAX_PRICE']\n",
"df[columns] = df[columns].astype(float)\n",
"df['ACCURACY'] = df.apply(calculate_accuracy, axis=1)\n",
"\n",
"\n",
"# 打印结果\n",
"print(df[['ds','ACCURACY',]+columns])\n",
"\n",
"df = df[['ds','ACCURACY','PREDICT_DATE','CREAT_DATE']+columns]"
]
},
{
"cell_type": "code",
"execution_count": 138,
"id": "0f942c69",
"metadata": {},
"outputs": [],
"source": [
"import datetime\n",
"# ds 按周取\n",
"df['Ds_Week'] = df['ds'].apply(lambda x: x.strftime('%U'))\n",
"df['Pre_Week'] = df['PREDICT_DATE'].apply(lambda x: x.strftime('%U'))"
]
},
{
"cell_type": "code",
"execution_count": 139,
"id": "a7b05510",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ds</th>\n",
" <th>ACCURACY</th>\n",
" <th>PREDICT_DATE</th>\n",
" <th>CREAT_DATE</th>\n",
" <th>HIGH_PRICE_y</th>\n",
" <th>LOW_PRICE_y</th>\n",
" <th>MIN_PRICE</th>\n",
" <th>MAX_PRICE</th>\n",
" <th>Ds_Week</th>\n",
" <th>Pre_Week</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2024-11-26</td>\n",
" <td>1.000000</td>\n",
" <td>2024-11-26</td>\n",
" <td>2024-11-25</td>\n",
" <td>73.80</td>\n",
" <td>71.63</td>\n",
" <td>71.071556</td>\n",
" <td>76.006900</td>\n",
" <td>47</td>\n",
" <td>47</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2024-11-27</td>\n",
" <td>1.000000</td>\n",
" <td>2024-11-27</td>\n",
" <td>2024-11-25</td>\n",
" <td>72.85</td>\n",
" <td>71.71</td>\n",
" <td>71.003624</td>\n",
" <td>75.580560</td>\n",
" <td>47</td>\n",
" <td>47</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2024-11-28</td>\n",
" <td>0.789324</td>\n",
" <td>2024-11-28</td>\n",
" <td>2024-11-25</td>\n",
" <td>72.96</td>\n",
" <td>71.85</td>\n",
" <td>72.083850</td>\n",
" <td>76.204260</td>\n",
" <td>47</td>\n",
" <td>47</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2024-11-29</td>\n",
" <td>1.000000</td>\n",
" <td>2024-11-29</td>\n",
" <td>2024-11-25</td>\n",
" <td>73.34</td>\n",
" <td>71.75</td>\n",
" <td>71.329730</td>\n",
" <td>75.703950</td>\n",
" <td>47</td>\n",
" <td>47</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2024-12-02</td>\n",
" <td>0.853412</td>\n",
" <td>2024-12-02</td>\n",
" <td>2024-11-25</td>\n",
" <td>72.89</td>\n",
" <td>71.52</td>\n",
" <td>71.720825</td>\n",
" <td>76.264275</td>\n",
" <td>48</td>\n",
" <td>48</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>70</th>\n",
" <td>2024-11-25</td>\n",
" <td>0.118328</td>\n",
" <td>2024-11-25</td>\n",
" <td>2024-11-22</td>\n",
" <td>74.83</td>\n",
" <td>72.30</td>\n",
" <td>74.530630</td>\n",
" <td>76.673140</td>\n",
" <td>47</td>\n",
" <td>47</td>\n",
" </tr>\n",
" <tr>\n",
" <th>71</th>\n",
" <td>2024-11-26</td>\n",
" <td>0.000000</td>\n",
" <td>2024-11-26</td>\n",
" <td>2024-11-22</td>\n",
" <td>73.80</td>\n",
" <td>71.63</td>\n",
" <td>74.440430</td>\n",
" <td>76.874565</td>\n",
" <td>47</td>\n",
" <td>47</td>\n",
" </tr>\n",
" <tr>\n",
" <th>72</th>\n",
" <td>2024-11-27</td>\n",
" <td>0.000000</td>\n",
" <td>2024-11-27</td>\n",
" <td>2024-11-22</td>\n",
" <td>72.85</td>\n",
" <td>71.71</td>\n",
" <td>74.663180</td>\n",
" <td>76.734130</td>\n",
" <td>47</td>\n",
" <td>47</td>\n",
" </tr>\n",
" <tr>\n",
" <th>73</th>\n",
" <td>2024-11-28</td>\n",
" <td>0.000000</td>\n",
" <td>2024-11-28</td>\n",
" <td>2024-11-22</td>\n",
" <td>72.96</td>\n",
" <td>71.85</td>\n",
" <td>74.708410</td>\n",
" <td>77.141050</td>\n",
" <td>47</td>\n",
" <td>47</td>\n",
" </tr>\n",
" <tr>\n",
" <th>74</th>\n",
" <td>2024-11-29</td>\n",
" <td>0.000000</td>\n",
" <td>2024-11-29</td>\n",
" <td>2024-11-22</td>\n",
" <td>73.34</td>\n",
" <td>71.75</td>\n",
" <td>74.703210</td>\n",
" <td>77.746170</td>\n",
" <td>47</td>\n",
" <td>47</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>75 rows × 10 columns</p>\n",
"</div>"
],
"text/plain": [
" ds ACCURACY PREDICT_DATE CREAT_DATE HIGH_PRICE_y LOW_PRICE_y \\\n",
"0 2024-11-26 1.000000 2024-11-26 2024-11-25 73.80 71.63 \n",
"1 2024-11-27 1.000000 2024-11-27 2024-11-25 72.85 71.71 \n",
"2 2024-11-28 0.789324 2024-11-28 2024-11-25 72.96 71.85 \n",
"3 2024-11-29 1.000000 2024-11-29 2024-11-25 73.34 71.75 \n",
"4 2024-12-02 0.853412 2024-12-02 2024-11-25 72.89 71.52 \n",
".. ... ... ... ... ... ... \n",
"70 2024-11-25 0.118328 2024-11-25 2024-11-22 74.83 72.30 \n",
"71 2024-11-26 0.000000 2024-11-26 2024-11-22 73.80 71.63 \n",
"72 2024-11-27 0.000000 2024-11-27 2024-11-22 72.85 71.71 \n",
"73 2024-11-28 0.000000 2024-11-28 2024-11-22 72.96 71.85 \n",
"74 2024-11-29 0.000000 2024-11-29 2024-11-22 73.34 71.75 \n",
"\n",
" MIN_PRICE MAX_PRICE Ds_Week Pre_Week \n",
"0 71.071556 76.006900 47 47 \n",
"1 71.003624 75.580560 47 47 \n",
"2 72.083850 76.204260 47 47 \n",
"3 71.329730 75.703950 47 47 \n",
"4 71.720825 76.264275 48 48 \n",
".. ... ... ... ... \n",
"70 74.530630 76.673140 47 47 \n",
"71 74.440430 76.874565 47 47 \n",
"72 74.663180 76.734130 47 47 \n",
"73 74.708410 77.141050 47 47 \n",
"74 74.703210 77.746170 47 47 \n",
"\n",
"[75 rows x 10 columns]"
]
},
"execution_count": 139,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 140,
"id": "1374e354",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['2024-11-22', '2024-11-23', '2024-11-24', '2024-11-25', '2024-11-26', '2024-11-27', '2024-11-28', '2024-11-29']\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"(15, 10)\n"
]
}
],
"source": [
"# 取结束日期上一周的日期\n",
"endtime = '2024-12-3'\n",
"endtimeweek = datetime.datetime.strptime(endtime, '%Y-%m-%d')\n",
"up_week = endtimeweek - datetime.timedelta(days=endtimeweek.weekday() + 14)\n",
"up_week_dates = [up_week + datetime.timedelta(days=i) for i in range(14)][4:-2]\n",
"up_week_dates = [date.strftime('%Y-%m-%d') for date in up_week_dates]\n",
"print(up_week_dates)\n",
"\n",
"\n",
"df3 = df.copy()\n",
"df3 = df3[df3['CREAT_DATE'].isin(up_week_dates)]\n",
"df3 = df3[df3['PREDICT_DATE'].isin(up_week_dates)]\n",
"print(df3.shape)\n",
"df3.to_csv('up_week_dates.csv',index=False)"
]
},
{
"cell_type": "code",
"execution_count": 141,
"id": "8aa47e90",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2024-11-25 00:00:00\n",
"权重: 0.07\n",
"准确率: 1.7749209486165771\n",
"2024-11-26 00:00:00\n",
"权重: 0.13\n",
"准确率: 7.5\n",
"2024-11-27 00:00:00\n",
"权重: 0.2\n",
"准确率: 8.034364035087705\n",
"2024-11-28 00:00:00\n",
"权重: 0.27\n",
"准确率: 9.718006756756724\n",
"2024-11-29 00:00:00\n",
"权重: 0.33\n",
"准确率: 10.824716981132076\n",
"37.85200872159308\n"
]
}
],
"source": [
"total = len(df3)\n",
"accuracy_rote = 0\n",
"# for i,group in df3.groupby('CREAT_DATE'):\n",
"for i,group in df3.groupby('ds'):\n",
" print(i)\n",
" print('权重:',round(len(group)/total,2))\n",
" print('准确率:',group['ACCURACY'].sum()/(len(group)/total))\n",
" accuracy_rote += group['ACCURACY'].sum()/(len(group)/total)\n",
"\n",
"print(accuracy_rote)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "base",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}