483 lines
16 KiB
Plaintext
483 lines
16 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 136,
|
||
"id": "9daadf20-caa6-4b25-901c-6cc3ef563f58",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"(75, 9)\n",
|
||
"(20, 4)\n",
|
||
"(75, 12)\n",
|
||
" id PREDICT_DATE CREAT_DATE MIN_PRICE MAX_PRICE HIGH_PRICE_x LOW_PRICE_x \\\n",
|
||
"0 1 2024-11-26 2024-11-25 71.071556 76.006900 \n",
|
||
"1 2 2024-11-27 2024-11-25 71.003624 75.580560 \n",
|
||
"2 3 2024-11-28 2024-11-25 72.083850 76.204260 \n",
|
||
"3 4 2024-11-29 2024-11-25 71.329730 75.703950 \n",
|
||
"4 5 2024-12-02 2024-11-25 71.720825 76.264275 \n",
|
||
"\n",
|
||
" RIGHT_ROTE ds 序号 LOW_PRICE_y HIGH_PRICE_y \n",
|
||
"0 2024-11-26 9.0 71.63 73.80 \n",
|
||
"1 2024-11-27 8.0 71.71 72.85 \n",
|
||
"2 2024-11-28 7.0 71.85 72.96 \n",
|
||
"3 2024-11-29 6.0 71.75 73.34 \n",
|
||
"4 2024-12-02 5.0 71.52 72.89 \n",
|
||
" id PREDICT_DATE CREAT_DATE MIN_PRICE MAX_PRICE HIGH_PRICE_x \\\n",
|
||
"70 71 2024-11-25 2024-11-22 74.53063 76.673140 \n",
|
||
"71 72 2024-11-26 2024-11-22 74.44043 76.874565 \n",
|
||
"72 73 2024-11-27 2024-11-22 74.66318 76.734130 \n",
|
||
"73 74 2024-11-28 2024-11-22 74.70841 77.141050 \n",
|
||
"74 75 2024-11-29 2024-11-22 74.70321 77.746170 \n",
|
||
"\n",
|
||
" LOW_PRICE_x RIGHT_ROTE ds 序号 LOW_PRICE_y HIGH_PRICE_y \n",
|
||
"70 2024-11-25 10.0 72.30 74.83 \n",
|
||
"71 2024-11-26 9.0 71.63 73.80 \n",
|
||
"72 2024-11-27 8.0 71.71 72.85 \n",
|
||
"73 2024-11-28 7.0 71.85 72.96 \n",
|
||
"74 2024-11-29 6.0 71.75 73.34 \n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"import sqlite3\n",
|
||
"import os\n",
|
||
"import pandas as pd\n",
|
||
"\n",
|
||
"# 预测价格数据\n",
|
||
"dbfilename = os.path.join(r'D:\\code\\PriceForecast\\yuanyoudataset','jbsh_yuanyou.db')\n",
|
||
"conn = sqlite3.connect(dbfilename)\n",
|
||
"query = 'SELECT * FROM accuracy'\n",
|
||
"df1 = pd.read_sql_query(query, conn)\n",
|
||
"df1['ds'] = df1['PREDICT_DATE']\n",
|
||
"conn.close()\n",
|
||
"print(df1.shape)\n",
|
||
"\n",
|
||
"\n",
|
||
"\n",
|
||
"\n",
|
||
"# 最高最低价\n",
|
||
"xlsfilename = os.path.join(r'D:\\code\\PriceForecast\\yuanyoudataset','数据项下载.xls')\n",
|
||
"df2 = pd.read_excel(xlsfilename)[5:]\n",
|
||
"df2 = df2.rename(columns = {'数据项名称':'ds','布伦特最低价':'LOW_PRICE','布伦特最高价':'HIGH_PRICE'})\n",
|
||
"print(df2.shape)\n",
|
||
"\n",
|
||
"\n",
|
||
"\n",
|
||
"df = pd.merge(df1,df2,on=['ds'],how='left')\n",
|
||
"\n",
|
||
"df['ds'] = pd.to_datetime(df['ds'])\n",
|
||
"df['PREDICT_DATE'] = pd.to_datetime(df['PREDICT_DATE'])\n",
|
||
"df = df.reindex()\n",
|
||
"\n",
|
||
"print(df.shape)\n",
|
||
"\n",
|
||
"df.to_csv(os.path.join(r'D:\\code\\PriceForecast\\yuanyoudataset','123.csv'))\n",
|
||
"# df = df[['ds','min_within_quantile','max_within_quantile']]\n",
|
||
"\n",
|
||
"\n",
|
||
"\n",
|
||
"# 打印数据框的前几行\n",
|
||
"print(df.head())\n",
|
||
"print(df.tail())\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 137,
|
||
"id": "e51c3fd0-6bff-45de-b8b6-971e7986c7a7",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
" ds ACCURACY HIGH_PRICE_y LOW_PRICE_y MIN_PRICE MAX_PRICE\n",
|
||
"0 2024-11-26 1.000000 73.80 71.63 71.071556 76.006900\n",
|
||
"1 2024-11-27 1.000000 72.85 71.71 71.003624 75.580560\n",
|
||
"2 2024-11-28 0.789324 72.96 71.85 72.083850 76.204260\n",
|
||
"3 2024-11-29 1.000000 73.34 71.75 71.329730 75.703950\n",
|
||
"4 2024-12-02 0.853412 72.89 71.52 71.720825 76.264275\n",
|
||
".. ... ... ... ... ... ...\n",
|
||
"70 2024-11-25 0.118328 74.83 72.30 74.530630 76.673140\n",
|
||
"71 2024-11-26 0.000000 73.80 71.63 74.440430 76.874565\n",
|
||
"72 2024-11-27 0.000000 72.85 71.71 74.663180 76.734130\n",
|
||
"73 2024-11-28 0.000000 72.96 71.85 74.708410 77.141050\n",
|
||
"74 2024-11-29 0.000000 73.34 71.75 74.703210 77.746170\n",
|
||
"\n",
|
||
"[75 rows x 6 columns]\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# 定义一个函数来计算准确率\n",
|
||
"def calculate_accuracy(row):\n",
|
||
" if row['HIGH_PRICE_y'] > row['MIN_PRICE']:\n",
|
||
" sorted_prices = sorted([row['LOW_PRICE_y'], row['MIN_PRICE'], row['MAX_PRICE'], row['HIGH_PRICE_y']])\n",
|
||
" middle_diff = sorted_prices[2] - sorted_prices[1]\n",
|
||
" price_range = row['HIGH_PRICE_y'] - row['LOW_PRICE_y']\n",
|
||
" accuracy = middle_diff / price_range\n",
|
||
" return accuracy\n",
|
||
" else:\n",
|
||
" return 0\n",
|
||
"\n",
|
||
"# 使用 apply 函数来应用计算准确率的函数\n",
|
||
"\n",
|
||
"columns = ['HIGH_PRICE_y','LOW_PRICE_y','MIN_PRICE','MAX_PRICE']\n",
|
||
"df[columns] = df[columns].astype(float)\n",
|
||
"df['ACCURACY'] = df.apply(calculate_accuracy, axis=1)\n",
|
||
"\n",
|
||
"\n",
|
||
"# 打印结果\n",
|
||
"print(df[['ds','ACCURACY',]+columns])\n",
|
||
"\n",
|
||
"df = df[['ds','ACCURACY','PREDICT_DATE','CREAT_DATE']+columns]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 138,
|
||
"id": "0f942c69",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import datetime\n",
|
||
"# ds 按周取\n",
|
||
"df['Ds_Week'] = df['ds'].apply(lambda x: x.strftime('%U'))\n",
|
||
"df['Pre_Week'] = df['PREDICT_DATE'].apply(lambda x: x.strftime('%U'))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 139,
|
||
"id": "a7b05510",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>ds</th>\n",
|
||
" <th>ACCURACY</th>\n",
|
||
" <th>PREDICT_DATE</th>\n",
|
||
" <th>CREAT_DATE</th>\n",
|
||
" <th>HIGH_PRICE_y</th>\n",
|
||
" <th>LOW_PRICE_y</th>\n",
|
||
" <th>MIN_PRICE</th>\n",
|
||
" <th>MAX_PRICE</th>\n",
|
||
" <th>Ds_Week</th>\n",
|
||
" <th>Pre_Week</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>2024-11-26</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>2024-11-26</td>\n",
|
||
" <td>2024-11-25</td>\n",
|
||
" <td>73.80</td>\n",
|
||
" <td>71.63</td>\n",
|
||
" <td>71.071556</td>\n",
|
||
" <td>76.006900</td>\n",
|
||
" <td>47</td>\n",
|
||
" <td>47</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2024-11-27</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>2024-11-27</td>\n",
|
||
" <td>2024-11-25</td>\n",
|
||
" <td>72.85</td>\n",
|
||
" <td>71.71</td>\n",
|
||
" <td>71.003624</td>\n",
|
||
" <td>75.580560</td>\n",
|
||
" <td>47</td>\n",
|
||
" <td>47</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>2024-11-28</td>\n",
|
||
" <td>0.789324</td>\n",
|
||
" <td>2024-11-28</td>\n",
|
||
" <td>2024-11-25</td>\n",
|
||
" <td>72.96</td>\n",
|
||
" <td>71.85</td>\n",
|
||
" <td>72.083850</td>\n",
|
||
" <td>76.204260</td>\n",
|
||
" <td>47</td>\n",
|
||
" <td>47</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>2024-11-29</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>2024-11-29</td>\n",
|
||
" <td>2024-11-25</td>\n",
|
||
" <td>73.34</td>\n",
|
||
" <td>71.75</td>\n",
|
||
" <td>71.329730</td>\n",
|
||
" <td>75.703950</td>\n",
|
||
" <td>47</td>\n",
|
||
" <td>47</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>2024-12-02</td>\n",
|
||
" <td>0.853412</td>\n",
|
||
" <td>2024-12-02</td>\n",
|
||
" <td>2024-11-25</td>\n",
|
||
" <td>72.89</td>\n",
|
||
" <td>71.52</td>\n",
|
||
" <td>71.720825</td>\n",
|
||
" <td>76.264275</td>\n",
|
||
" <td>48</td>\n",
|
||
" <td>48</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>70</th>\n",
|
||
" <td>2024-11-25</td>\n",
|
||
" <td>0.118328</td>\n",
|
||
" <td>2024-11-25</td>\n",
|
||
" <td>2024-11-22</td>\n",
|
||
" <td>74.83</td>\n",
|
||
" <td>72.30</td>\n",
|
||
" <td>74.530630</td>\n",
|
||
" <td>76.673140</td>\n",
|
||
" <td>47</td>\n",
|
||
" <td>47</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>71</th>\n",
|
||
" <td>2024-11-26</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>2024-11-26</td>\n",
|
||
" <td>2024-11-22</td>\n",
|
||
" <td>73.80</td>\n",
|
||
" <td>71.63</td>\n",
|
||
" <td>74.440430</td>\n",
|
||
" <td>76.874565</td>\n",
|
||
" <td>47</td>\n",
|
||
" <td>47</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>72</th>\n",
|
||
" <td>2024-11-27</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>2024-11-27</td>\n",
|
||
" <td>2024-11-22</td>\n",
|
||
" <td>72.85</td>\n",
|
||
" <td>71.71</td>\n",
|
||
" <td>74.663180</td>\n",
|
||
" <td>76.734130</td>\n",
|
||
" <td>47</td>\n",
|
||
" <td>47</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>73</th>\n",
|
||
" <td>2024-11-28</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>2024-11-28</td>\n",
|
||
" <td>2024-11-22</td>\n",
|
||
" <td>72.96</td>\n",
|
||
" <td>71.85</td>\n",
|
||
" <td>74.708410</td>\n",
|
||
" <td>77.141050</td>\n",
|
||
" <td>47</td>\n",
|
||
" <td>47</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>74</th>\n",
|
||
" <td>2024-11-29</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>2024-11-29</td>\n",
|
||
" <td>2024-11-22</td>\n",
|
||
" <td>73.34</td>\n",
|
||
" <td>71.75</td>\n",
|
||
" <td>74.703210</td>\n",
|
||
" <td>77.746170</td>\n",
|
||
" <td>47</td>\n",
|
||
" <td>47</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>75 rows × 10 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" ds ACCURACY PREDICT_DATE CREAT_DATE HIGH_PRICE_y LOW_PRICE_y \\\n",
|
||
"0 2024-11-26 1.000000 2024-11-26 2024-11-25 73.80 71.63 \n",
|
||
"1 2024-11-27 1.000000 2024-11-27 2024-11-25 72.85 71.71 \n",
|
||
"2 2024-11-28 0.789324 2024-11-28 2024-11-25 72.96 71.85 \n",
|
||
"3 2024-11-29 1.000000 2024-11-29 2024-11-25 73.34 71.75 \n",
|
||
"4 2024-12-02 0.853412 2024-12-02 2024-11-25 72.89 71.52 \n",
|
||
".. ... ... ... ... ... ... \n",
|
||
"70 2024-11-25 0.118328 2024-11-25 2024-11-22 74.83 72.30 \n",
|
||
"71 2024-11-26 0.000000 2024-11-26 2024-11-22 73.80 71.63 \n",
|
||
"72 2024-11-27 0.000000 2024-11-27 2024-11-22 72.85 71.71 \n",
|
||
"73 2024-11-28 0.000000 2024-11-28 2024-11-22 72.96 71.85 \n",
|
||
"74 2024-11-29 0.000000 2024-11-29 2024-11-22 73.34 71.75 \n",
|
||
"\n",
|
||
" MIN_PRICE MAX_PRICE Ds_Week Pre_Week \n",
|
||
"0 71.071556 76.006900 47 47 \n",
|
||
"1 71.003624 75.580560 47 47 \n",
|
||
"2 72.083850 76.204260 47 47 \n",
|
||
"3 71.329730 75.703950 47 47 \n",
|
||
"4 71.720825 76.264275 48 48 \n",
|
||
".. ... ... ... ... \n",
|
||
"70 74.530630 76.673140 47 47 \n",
|
||
"71 74.440430 76.874565 47 47 \n",
|
||
"72 74.663180 76.734130 47 47 \n",
|
||
"73 74.708410 77.141050 47 47 \n",
|
||
"74 74.703210 77.746170 47 47 \n",
|
||
"\n",
|
||
"[75 rows x 10 columns]"
|
||
]
|
||
},
|
||
"execution_count": 139,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 140,
|
||
"id": "1374e354",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"['2024-11-22', '2024-11-23', '2024-11-24', '2024-11-25', '2024-11-26', '2024-11-27', '2024-11-28', '2024-11-29']\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"(15, 10)\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# 取结束日期上一周的日期\n",
|
||
"endtime = '2024-12-3'\n",
|
||
"endtimeweek = datetime.datetime.strptime(endtime, '%Y-%m-%d')\n",
|
||
"up_week = endtimeweek - datetime.timedelta(days=endtimeweek.weekday() + 14)\n",
|
||
"up_week_dates = [up_week + datetime.timedelta(days=i) for i in range(14)][4:-2]\n",
|
||
"up_week_dates = [date.strftime('%Y-%m-%d') for date in up_week_dates]\n",
|
||
"print(up_week_dates)\n",
|
||
"\n",
|
||
"\n",
|
||
"df3 = df.copy()\n",
|
||
"df3 = df3[df3['CREAT_DATE'].isin(up_week_dates)]\n",
|
||
"df3 = df3[df3['PREDICT_DATE'].isin(up_week_dates)]\n",
|
||
"print(df3.shape)\n",
|
||
"df3.to_csv('up_week_dates.csv',index=False)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 141,
|
||
"id": "8aa47e90",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"2024-11-25 00:00:00\n",
|
||
"权重: 0.07\n",
|
||
"准确率: 1.7749209486165771\n",
|
||
"2024-11-26 00:00:00\n",
|
||
"权重: 0.13\n",
|
||
"准确率: 7.5\n",
|
||
"2024-11-27 00:00:00\n",
|
||
"权重: 0.2\n",
|
||
"准确率: 8.034364035087705\n",
|
||
"2024-11-28 00:00:00\n",
|
||
"权重: 0.27\n",
|
||
"准确率: 9.718006756756724\n",
|
||
"2024-11-29 00:00:00\n",
|
||
"权重: 0.33\n",
|
||
"准确率: 10.824716981132076\n",
|
||
"37.85200872159308\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"total = len(df3)\n",
|
||
"accuracy_rote = 0\n",
|
||
"# for i,group in df3.groupby('CREAT_DATE'):\n",
|
||
"for i,group in df3.groupby('ds'):\n",
|
||
" print(i)\n",
|
||
" print('权重:',round(len(group)/total,2))\n",
|
||
" print('准确率:',group['ACCURACY'].sum()/(len(group)/total))\n",
|
||
" accuracy_rote += group['ACCURACY'].sum()/(len(group)/total)\n",
|
||
"\n",
|
||
"print(accuracy_rote)"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "base",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.11.7"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|