PriceForecast/原油价格预测准确率计算.ipynb

681 lines
31 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": 9,
"id": "9daadf20-caa6-4b25-901c-6cc3ef563f58",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(255, 28)\n",
"(78, 4)\n",
"(255, 31)\n",
" ds NHITS Informer LSTM iTransformer TSMixer \\\n",
"0 2024-10-08 79.76823 80.197660 79.802414 78.391460 80.200510 \n",
"1 2024-10-09 78.75903 80.235740 79.844154 78.888565 79.861180 \n",
"2 2024-10-10 79.43970 80.186230 79.885100 79.488700 79.483086 \n",
"3 2024-10-11 79.62268 80.502975 79.878560 79.406670 79.313965 \n",
"4 2024-10-14 79.91698 80.931946 79.936270 79.758575 79.197430 \n",
"\n",
" TSMixerx PatchTST RNN GRU ... y \\\n",
"0 79.243256 80.883450 80.836105 81.276060 ... 77.180000 \n",
"1 78.068150 80.950096 80.917860 81.303505 ... 76.580002 \n",
"2 77.189064 80.347400 80.866040 81.798050 ... 79.400002 \n",
"3 77.840096 80.545296 81.167710 81.552810 ... 79.040001 \n",
"4 77.904300 81.432976 81.144210 81.483215 ... 77.459999 \n",
"\n",
" min_within_quantile max_within_quantile id CREAT_DATE min_price \\\n",
"0 80.200510 81.163630 51 2024-10-07 79.848624 \n",
"1 79.861180 81.757850 52 2024-10-07 79.981211 \n",
"2 79.483086 81.190400 53 2024-10-07 79.398409 \n",
"3 79.313965 81.371100 54 2024-10-07 79.394607 \n",
"4 79.197430 81.432976 55 2024-10-07 79.351007 \n",
"\n",
" max_price 序号 LOW_PRICE HIGH_PRICE \n",
"0 81.848624 52.0 76.36 81.14 \n",
"1 81.981211 51.0 75.15 78.02 \n",
"2 81.398409 50.0 76.72 79.72 \n",
"3 81.394607 49.0 78.04 79.50 \n",
"4 81.351007 48.0 74.86 78.55 \n",
"\n",
"[5 rows x 31 columns]\n",
" ds NHITS Informer LSTM iTransformer TSMixer \\\n",
"250 2024-12-16 74.268654 73.333750 73.090164 74.007034 74.36094 \n",
"251 2024-12-17 74.724630 73.373810 73.242540 74.285530 74.24597 \n",
"252 2024-12-18 74.948860 73.505330 73.400400 74.260290 74.06419 \n",
"253 2024-12-19 74.396740 73.934380 73.764320 74.429800 74.18059 \n",
"254 2024-12-20 73.882930 73.700935 73.769050 73.977585 73.97370 \n",
"\n",
" TSMixerx PatchTST RNN GRU ... y min_within_quantile \\\n",
"250 74.67781 74.475680 75.63023 74.853800 ... NaN 74.157196 \n",
"251 74.46460 74.628000 75.22519 74.957530 ... NaN 73.711680 \n",
"252 74.32628 74.656815 75.49716 74.890236 ... NaN 74.064190 \n",
"253 74.41026 74.698875 75.87007 75.118866 ... NaN 74.148070 \n",
"254 74.49235 74.345410 75.88466 75.186325 ... NaN 73.816990 \n",
"\n",
" max_within_quantile id CREAT_DATE min_price max_price 序号 \\\n",
"250 74.576454 301 2024-12-16 73.416857 75.416857 3.0 \n",
"251 74.948060 302 2024-12-16 73.434301 75.434301 2.0 \n",
"252 75.200980 303 2024-12-16 73.707471 75.707471 1.0 \n",
"253 75.395440 304 2024-12-16 73.639791 75.639791 NaN \n",
"254 74.345410 305 2024-12-16 73.067399 75.067399 NaN \n",
"\n",
" LOW_PRICE HIGH_PRICE \n",
"250 72.53 73.28 \n",
"251 72.48 74.18 \n",
"252 72.80 74.20 \n",
"253 NaN NaN \n",
"254 NaN NaN \n",
"\n",
"[5 rows x 31 columns]\n"
]
}
],
"source": [
"import sqlite3\n",
"import os\n",
"import pandas as pd\n",
"\n",
"dataset = r'yuanyoudataset'\n",
"\n",
"# dataset = r'C:\\Users\\Administrator\\Desktop' \n",
"\n",
"# 预测价格数据\n",
"# dbfilename = os.path.join(r'D:\\code\\PriceForecast\\yuanyoudataset','jbsh_yuanyou.db')\n",
"# conn = sqlite3.connect(dbfilename)\n",
"# query = 'SELECT * FROM accuracy'\n",
"# df1 = pd.read_sql_query(query, conn)\n",
"# df1['ds'] = df1['PREDICT_DATE']\n",
"# conn.close()\n",
"# print(df1.shape)\n",
"\n",
"# 预测价格数据\n",
"dfcsvfilename = os.path.join(dataset,'accuracy_five_mean.csv')\n",
"df1 = pd.read_csv(dfcsvfilename)\n",
"print(df1.shape)\n",
"\n",
"# 最高最低价\n",
"xlsfilename = os.path.join(dataset,'数据项下载.xls')\n",
"df2 = pd.read_excel(xlsfilename)[5:]\n",
"df2 = df2.rename(columns = {'数据项名称':'ds','布伦特最低价':'LOW_PRICE','布伦特最高价':'HIGH_PRICE'})\n",
"print(df2.shape)\n",
"\n",
"\n",
"\n",
"df = pd.merge(df1,df2,on=['ds'],how='left')\n",
"\n",
"df['ds'] = pd.to_datetime(df['ds'])\n",
"# df['PREDICT_DATE'] = pd.to_datetime(df['PREDICT_DATE'])\n",
"df = df.reindex()\n",
"\n",
"print(df.shape)\n",
"# from datetime import datetime\n",
"import time\n",
"df.to_csv(os.path.join(dataset,f'预测数据-{time.time()}.csv'))\n",
"# df = df[['ds','min_within_quantile','max_within_quantile']]\n",
"\n",
"\n",
"\n",
"# 打印数据框的前几行\n",
"print(df.head())\n",
"print(df.tail())\n"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "0d77ab7d",
"metadata": {},
"outputs": [],
"source": [
"# 模型评估前五均值 \n",
"df['min_price'] = df.iloc[:,1:6].mean(axis=1) -1.5\n",
"df['max_price'] = df.iloc[:,1:6].mean(axis=1) +1.5"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "e51c3fd0-6bff-45de-b8b6-971e7986c7a7",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 开始日期 结束日期 准确率\n",
"0 2024-09-27 2024-10-04 0\n",
" 开始日期 结束日期 准确率\n",
"0 2024-09-27 2024-10-04 0\n",
" 开始日期 结束日期 准确率\n",
"0 2024-09-27 2024-10-04 0\n",
" 开始日期 结束日期 准确率\n",
"0 2024-09-27 2024-10-04 0\n",
" 开始日期 结束日期 准确率\n",
"0 2024-09-27 2024-10-04 0\n",
" 开始日期 结束日期 准确率\n",
"0 2024-10-04 2024-10-11 0.495046\n",
" 开始日期 结束日期 准确率\n",
"0 2024-10-04 2024-10-11 0.495046\n",
" 开始日期 结束日期 准确率\n",
"0 2024-10-04 2024-10-11 0.495046\n",
" 开始日期 结束日期 准确率\n",
"0 2024-10-04 2024-10-11 0.495046\n",
" 开始日期 结束日期 准确率\n",
"0 2024-10-04 2024-10-11 0.495046\n",
" 开始日期 结束日期 准确率\n",
"0 2024-10-11 2024-10-18 0.449368\n",
" 开始日期 结束日期 准确率\n",
"0 2024-10-11 2024-10-18 0.449368\n",
" 开始日期 结束日期 准确率\n",
"0 2024-10-11 2024-10-18 0.449368\n",
" 开始日期 结束日期 准确率\n",
"0 2024-10-11 2024-10-18 0.449368\n",
" 开始日期 结束日期 准确率\n",
"0 2024-10-11 2024-10-18 0.449368\n",
" 开始日期 结束日期 准确率\n",
"0 2024-10-18 2024-10-25 0.814057\n",
" 开始日期 结束日期 准确率\n",
"0 2024-10-18 2024-10-25 0.814057\n",
" 开始日期 结束日期 准确率\n",
"0 2024-10-18 2024-10-25 0.814057\n",
" 开始日期 结束日期 准确率\n",
"0 2024-10-18 2024-10-25 0.814057\n",
" 开始日期 结束日期 准确率\n",
"0 2024-10-18 2024-10-25 0.814057\n",
" 开始日期 结束日期 准确率\n",
"0 2024-10-25 2024-11-01 0.433599\n",
" 开始日期 结束日期 准确率\n",
"0 2024-10-25 2024-11-01 0.433599\n",
" 开始日期 结束日期 准确率\n",
"0 2024-10-25 2024-11-01 0.433599\n",
" 开始日期 结束日期 准确率\n",
"0 2024-10-25 2024-11-01 0.433599\n",
" 开始日期 结束日期 准确率\n",
"0 2024-10-25 2024-11-01 0.433599\n",
" 开始日期 结束日期 准确率\n",
"0 2024-11-01 2024-11-08 0.894767\n",
" 开始日期 结束日期 准确率\n",
"0 2024-11-01 2024-11-08 0.894767\n",
" 开始日期 结束日期 准确率\n",
"0 2024-11-01 2024-11-08 0.894767\n",
" 开始日期 结束日期 准确率\n",
"0 2024-11-01 2024-11-08 0.894767\n",
" 开始日期 结束日期 准确率\n",
"0 2024-11-01 2024-11-08 0.894767\n",
" 开始日期 结束日期 准确率\n",
"0 2024-11-08 2024-11-15 0.915721\n",
" 开始日期 结束日期 准确率\n",
"0 2024-11-08 2024-11-15 0.915721\n",
" 开始日期 结束日期 准确率\n",
"0 2024-11-08 2024-11-15 0.915721\n",
" 开始日期 结束日期 准确率\n",
"0 2024-11-08 2024-11-15 0.915721\n",
" 开始日期 结束日期 准确率\n",
"0 2024-11-08 2024-11-15 0.915721\n",
" 开始日期 结束日期 准确率\n",
"0 2024-11-15 2024-11-22 0.835755\n",
" 开始日期 结束日期 准确率\n",
"0 2024-11-15 2024-11-22 0.835755\n",
" 开始日期 结束日期 准确率\n",
"0 2024-11-15 2024-11-22 0.835755\n",
" 开始日期 结束日期 准确率\n",
"0 2024-11-15 2024-11-22 0.835755\n",
" 开始日期 结束日期 准确率\n",
"0 2024-11-15 2024-11-22 0.835755\n",
" 开始日期 结束日期 准确率\n",
"0 2024-11-22 2024-11-29 0.718009\n",
" 开始日期 结束日期 准确率\n",
"0 2024-11-22 2024-11-29 0.718009\n",
" 开始日期 结束日期 准确率\n",
"0 2024-11-22 2024-11-29 0.718009\n",
" 开始日期 结束日期 准确率\n",
"0 2024-11-22 2024-11-29 0.718009\n",
" 开始日期 结束日期 准确率\n",
"0 2024-11-22 2024-11-29 0.718009\n",
" 开始日期 结束日期 准确率\n",
"0 2024-11-29 2024-12-06 0.948363\n",
" 开始日期 结束日期 准确率\n",
"0 2024-11-29 2024-12-06 0.948363\n",
" 开始日期 结束日期 准确率\n",
"0 2024-11-29 2024-12-06 0.948363\n",
" 开始日期 结束日期 准确率\n",
"0 2024-11-29 2024-12-06 0.948363\n",
" 开始日期 结束日期 准确率\n",
"0 2024-11-29 2024-12-06 0.948363\n",
" 开始日期 结束日期 准确率\n",
"0 2024-12-06 2024-12-13 0.947006\n"
]
}
],
"source": [
"# 定义一个函数来计算准确率\n",
"# 比较真实最高最低,和预测最高最低 计算准确率\n",
"def calculate_accuracy(row):\n",
" # 全子集情况:\n",
" if (row['max_price'] >= row['HIGH_PRICE'] and row['min_price'] <= row['LOW_PRICE']) or \\\n",
" (row['max_price'] <= row['HIGH_PRICE'] and row['min_price'] >= row['LOW_PRICE']):\n",
" return 1 \n",
" # 无交集情况:\n",
" if row['max_price'] < row['LOW_PRICE'] or \\\n",
" row['min_price'] > row['HIGH_PRICE']:\n",
" return 0\n",
" # 有交集情况:\n",
" else:\n",
" sorted_prices = sorted([row['LOW_PRICE'], row['min_price'], row['max_price'], row['HIGH_PRICE']])\n",
" middle_diff = sorted_prices[2] - sorted_prices[1]\n",
" price_range = row['HIGH_PRICE'] - row['LOW_PRICE']\n",
" accuracy = middle_diff / price_range\n",
" return accuracy\n",
"import datetime\n",
"weight_dict = [0.4,0.15,0.1,0.1,0.25] # 权重\n",
"\n",
"columns = ['HIGH_PRICE','LOW_PRICE','min_price','max_price']\n",
"df[columns] = df[columns].astype(float)\n",
"df['ACCURACY'] = df.apply(calculate_accuracy, axis=1)\n",
"# df['ACCURACY'] = df.apply(is_within_range, axis=1)\n",
"# 取结束日期上一周的日期\n",
"def get_week_date(end_time):\n",
" endtime = end_time\n",
" endtimeweek = datetime.datetime.strptime(endtime, '%Y-%m-%d')\n",
" up_week = endtimeweek - datetime.timedelta(days=endtimeweek.weekday() + 14)\n",
" up_week_dates = [up_week + datetime.timedelta(days=i) for i in range(14)][4:-2]\n",
" up_week_dates = [date.strftime('%Y-%m-%d') for date in up_week_dates]\n",
" return up_week_dates\n",
"\n",
"# 计算准确率并保存结果\n",
"def _get_accuracy_rate(df,up_week_dates,endtime):\n",
" df3 = df.copy()\n",
" df3 = df3[df3['CREAT_DATE'].isin(up_week_dates)]\n",
" df3 = df3[df3['ds'].isin(up_week_dates)]\n",
" accuracy_rote = 0\n",
" for i,group in df3.groupby('ds'):\n",
" # print('权重:',weight_dict[len(group)-1])\n",
" # print('准确率:',(group['ACCURACY'].sum()/len(group))*weight_dict[len(group)-1])\n",
" accuracy_rote += (group['ACCURACY'].sum()/len(group))*weight_dict[len(group)-1]\n",
" df3.to_csv(os.path.join(dataset,f'accuracy_{endtime}.csv'),index=False)\n",
" df4 = pd.DataFrame(columns=['开始日期','结束日期','准确率'])\n",
" df4.loc[len(df4)] = {'开始日期':up_week_dates[0],'结束日期':up_week_dates[-1],'准确率':accuracy_rote}\n",
" df4.to_csv(os.path.join(dataset,f'accuracy_rote_{endtime}.csv'),index=False)\n",
" print(df4)\n",
" # df4.to_sql(\"accuracy_rote\", con=sqlitedb.connection, if_exists='append', index=False)\n",
"\n",
"\n",
"end_times = df['CREAT_DATE'].unique()\n",
"for endtime in end_times:\n",
" up_week_dates = get_week_date(endtime)\n",
" _get_accuracy_rate(df,up_week_dates,endtime)\n",
"\n",
"# 打印结果\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "0f942c69",
"metadata": {},
"outputs": [
{
"ename": "KeyError",
"evalue": "'PREDICT_DATE'",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)",
"File \u001b[1;32md:\\ProgramData\\anaconda3\\Lib\\site-packages\\pandas\\core\\indexes\\base.py:3791\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 3790\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m-> 3791\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_engine\u001b[38;5;241m.\u001b[39mget_loc(casted_key)\n\u001b[0;32m 3792\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n",
"File \u001b[1;32mindex.pyx:152\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[1;34m()\u001b[0m\n",
"File \u001b[1;32mindex.pyx:181\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[1;34m()\u001b[0m\n",
"File \u001b[1;32mpandas\\_libs\\hashtable_class_helper.pxi:7080\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[1;34m()\u001b[0m\n",
"File \u001b[1;32mpandas\\_libs\\hashtable_class_helper.pxi:7088\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[1;34m()\u001b[0m\n",
"\u001b[1;31mKeyError\u001b[0m: 'PREDICT_DATE'",
"\nThe above exception was the direct cause of the following exception:\n",
"\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[12], line 4\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[38;5;66;03m# ds 按周取\u001b[39;00m\n\u001b[0;32m 3\u001b[0m df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mDs_Week\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mds\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mapply(\u001b[38;5;28;01mlambda\u001b[39;00m x: x\u001b[38;5;241m.\u001b[39mstrftime(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m%\u001b[39m\u001b[38;5;124mU\u001b[39m\u001b[38;5;124m'\u001b[39m))\n\u001b[1;32m----> 4\u001b[0m df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mPre_Week\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mPREDICT_DATE\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mapply(\u001b[38;5;28;01mlambda\u001b[39;00m x: x\u001b[38;5;241m.\u001b[39mstrftime(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m%\u001b[39m\u001b[38;5;124mU\u001b[39m\u001b[38;5;124m'\u001b[39m))\n",
"File \u001b[1;32md:\\ProgramData\\anaconda3\\Lib\\site-packages\\pandas\\core\\frame.py:3893\u001b[0m, in \u001b[0;36mDataFrame.__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 3891\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcolumns\u001b[38;5;241m.\u001b[39mnlevels \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[0;32m 3892\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_getitem_multilevel(key)\n\u001b[1;32m-> 3893\u001b[0m indexer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcolumns\u001b[38;5;241m.\u001b[39mget_loc(key)\n\u001b[0;32m 3894\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_integer(indexer):\n\u001b[0;32m 3895\u001b[0m indexer \u001b[38;5;241m=\u001b[39m [indexer]\n",
"File \u001b[1;32md:\\ProgramData\\anaconda3\\Lib\\site-packages\\pandas\\core\\indexes\\base.py:3798\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 3793\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(casted_key, \u001b[38;5;28mslice\u001b[39m) \u001b[38;5;129;01mor\u001b[39;00m (\n\u001b[0;32m 3794\u001b[0m \u001b[38;5;28misinstance\u001b[39m(casted_key, abc\u001b[38;5;241m.\u001b[39mIterable)\n\u001b[0;32m 3795\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28many\u001b[39m(\u001b[38;5;28misinstance\u001b[39m(x, \u001b[38;5;28mslice\u001b[39m) \u001b[38;5;28;01mfor\u001b[39;00m x \u001b[38;5;129;01min\u001b[39;00m casted_key)\n\u001b[0;32m 3796\u001b[0m ):\n\u001b[0;32m 3797\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m InvalidIndexError(key)\n\u001b[1;32m-> 3798\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01merr\u001b[39;00m\n\u001b[0;32m 3799\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n\u001b[0;32m 3800\u001b[0m \u001b[38;5;66;03m# If we have a listlike key, _check_indexing_error will raise\u001b[39;00m\n\u001b[0;32m 3801\u001b[0m \u001b[38;5;66;03m# InvalidIndexError. Otherwise we fall through and re-raise\u001b[39;00m\n\u001b[0;32m 3802\u001b[0m \u001b[38;5;66;03m# the TypeError.\u001b[39;00m\n\u001b[0;32m 3803\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_check_indexing_error(key)\n",
"\u001b[1;31mKeyError\u001b[0m: 'PREDICT_DATE'"
]
}
],
"source": [
"import datetime\n",
"# ds 按周取\n",
"df['Ds_Week'] = df['ds'].apply(lambda x: x.strftime('%U'))\n",
"df['Pre_Week'] = df['PREDICT_DATE'].apply(lambda x: x.strftime('%U'))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a7b05510",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ds</th>\n",
" <th>ACCURACY</th>\n",
" <th>PREDICT_DATE</th>\n",
" <th>CREAT_DATE</th>\n",
" <th>HIGH_PRICE_y</th>\n",
" <th>LOW_PRICE_y</th>\n",
" <th>MIN_PRICE</th>\n",
" <th>MAX_PRICE</th>\n",
" <th>Ds_Week</th>\n",
" <th>Pre_Week</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2024-11-26</td>\n",
" <td>1.000000</td>\n",
" <td>2024-11-26</td>\n",
" <td>2024-11-25</td>\n",
" <td>73.80</td>\n",
" <td>71.63</td>\n",
" <td>71.071556</td>\n",
" <td>76.006900</td>\n",
" <td>47</td>\n",
" <td>47</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2024-11-27</td>\n",
" <td>1.000000</td>\n",
" <td>2024-11-27</td>\n",
" <td>2024-11-25</td>\n",
" <td>72.85</td>\n",
" <td>71.71</td>\n",
" <td>71.003624</td>\n",
" <td>75.580560</td>\n",
" <td>47</td>\n",
" <td>47</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2024-11-28</td>\n",
" <td>0.789324</td>\n",
" <td>2024-11-28</td>\n",
" <td>2024-11-25</td>\n",
" <td>72.96</td>\n",
" <td>71.85</td>\n",
" <td>72.083850</td>\n",
" <td>76.204260</td>\n",
" <td>47</td>\n",
" <td>47</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2024-11-29</td>\n",
" <td>1.000000</td>\n",
" <td>2024-11-29</td>\n",
" <td>2024-11-25</td>\n",
" <td>73.34</td>\n",
" <td>71.75</td>\n",
" <td>71.329730</td>\n",
" <td>75.703950</td>\n",
" <td>47</td>\n",
" <td>47</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2024-12-02</td>\n",
" <td>0.853412</td>\n",
" <td>2024-12-02</td>\n",
" <td>2024-11-25</td>\n",
" <td>72.89</td>\n",
" <td>71.52</td>\n",
" <td>71.720825</td>\n",
" <td>76.264275</td>\n",
" <td>48</td>\n",
" <td>48</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>70</th>\n",
" <td>2024-11-25</td>\n",
" <td>0.118328</td>\n",
" <td>2024-11-25</td>\n",
" <td>2024-11-22</td>\n",
" <td>74.83</td>\n",
" <td>72.30</td>\n",
" <td>74.530630</td>\n",
" <td>76.673140</td>\n",
" <td>47</td>\n",
" <td>47</td>\n",
" </tr>\n",
" <tr>\n",
" <th>71</th>\n",
" <td>2024-11-26</td>\n",
" <td>0.000000</td>\n",
" <td>2024-11-26</td>\n",
" <td>2024-11-22</td>\n",
" <td>73.80</td>\n",
" <td>71.63</td>\n",
" <td>74.440430</td>\n",
" <td>76.874565</td>\n",
" <td>47</td>\n",
" <td>47</td>\n",
" </tr>\n",
" <tr>\n",
" <th>72</th>\n",
" <td>2024-11-27</td>\n",
" <td>0.000000</td>\n",
" <td>2024-11-27</td>\n",
" <td>2024-11-22</td>\n",
" <td>72.85</td>\n",
" <td>71.71</td>\n",
" <td>74.663180</td>\n",
" <td>76.734130</td>\n",
" <td>47</td>\n",
" <td>47</td>\n",
" </tr>\n",
" <tr>\n",
" <th>73</th>\n",
" <td>2024-11-28</td>\n",
" <td>0.000000</td>\n",
" <td>2024-11-28</td>\n",
" <td>2024-11-22</td>\n",
" <td>72.96</td>\n",
" <td>71.85</td>\n",
" <td>74.708410</td>\n",
" <td>77.141050</td>\n",
" <td>47</td>\n",
" <td>47</td>\n",
" </tr>\n",
" <tr>\n",
" <th>74</th>\n",
" <td>2024-11-29</td>\n",
" <td>0.000000</td>\n",
" <td>2024-11-29</td>\n",
" <td>2024-11-22</td>\n",
" <td>73.34</td>\n",
" <td>71.75</td>\n",
" <td>74.703210</td>\n",
" <td>77.746170</td>\n",
" <td>47</td>\n",
" <td>47</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>75 rows × 10 columns</p>\n",
"</div>"
],
"text/plain": [
" ds ACCURACY PREDICT_DATE CREAT_DATE HIGH_PRICE_y LOW_PRICE_y \\\n",
"0 2024-11-26 1.000000 2024-11-26 2024-11-25 73.80 71.63 \n",
"1 2024-11-27 1.000000 2024-11-27 2024-11-25 72.85 71.71 \n",
"2 2024-11-28 0.789324 2024-11-28 2024-11-25 72.96 71.85 \n",
"3 2024-11-29 1.000000 2024-11-29 2024-11-25 73.34 71.75 \n",
"4 2024-12-02 0.853412 2024-12-02 2024-11-25 72.89 71.52 \n",
".. ... ... ... ... ... ... \n",
"70 2024-11-25 0.118328 2024-11-25 2024-11-22 74.83 72.30 \n",
"71 2024-11-26 0.000000 2024-11-26 2024-11-22 73.80 71.63 \n",
"72 2024-11-27 0.000000 2024-11-27 2024-11-22 72.85 71.71 \n",
"73 2024-11-28 0.000000 2024-11-28 2024-11-22 72.96 71.85 \n",
"74 2024-11-29 0.000000 2024-11-29 2024-11-22 73.34 71.75 \n",
"\n",
" MIN_PRICE MAX_PRICE Ds_Week Pre_Week \n",
"0 71.071556 76.006900 47 47 \n",
"1 71.003624 75.580560 47 47 \n",
"2 72.083850 76.204260 47 47 \n",
"3 71.329730 75.703950 47 47 \n",
"4 71.720825 76.264275 48 48 \n",
".. ... ... ... ... \n",
"70 74.530630 76.673140 47 47 \n",
"71 74.440430 76.874565 47 47 \n",
"72 74.663180 76.734130 47 47 \n",
"73 74.708410 77.141050 47 47 \n",
"74 74.703210 77.746170 47 47 \n",
"\n",
"[75 rows x 10 columns]"
]
},
"execution_count": 139,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1374e354",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['2024-11-22', '2024-11-23', '2024-11-24', '2024-11-25', '2024-11-26', '2024-11-27', '2024-11-28', '2024-11-29']\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"(15, 10)\n"
]
}
],
"source": [
"# 取结束日期上一周的日期\n",
"endtime = '2024-12-3'\n",
"endtimeweek = datetime.datetime.strptime(endtime, '%Y-%m-%d')\n",
"up_week = endtimeweek - datetime.timedelta(days=endtimeweek.weekday() + 14)\n",
"up_week_dates = [up_week + datetime.timedelta(days=i) for i in range(14)][4:-2]\n",
"up_week_dates = [date.strftime('%Y-%m-%d') for date in up_week_dates]\n",
"print(up_week_dates)\n",
"\n",
"\n",
"df3 = df.copy()\n",
"df3 = df3[df3['CREAT_DATE'].isin(up_week_dates)]\n",
"df3 = df3[df3['PREDICT_DATE'].isin(up_week_dates)]\n",
"print(df3.shape)\n",
"df3.to_csv('up_week_dates.csv',index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8aa47e90",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2024-11-25 00:00:00\n",
"权重: 0.07\n",
"准确率: 1.7749209486165771\n",
"2024-11-26 00:00:00\n",
"权重: 0.13\n",
"准确率: 7.5\n",
"2024-11-27 00:00:00\n",
"权重: 0.2\n",
"准确率: 8.034364035087705\n",
"2024-11-28 00:00:00\n",
"权重: 0.27\n",
"准确率: 9.718006756756724\n",
"2024-11-29 00:00:00\n",
"权重: 0.33\n",
"准确率: 10.824716981132076\n",
"37.85200872159308\n"
]
}
],
"source": [
"total = len(df3)\n",
"accuracy_rote = 0\n",
"# for i,group in df3.groupby('CREAT_DATE'):\n",
"for i,group in df3.groupby('ds'):\n",
" print(i)\n",
" print('权重:',round(len(group)/total,2))\n",
" print('准确率:',group['ACCURACY'].sum()/(len(group)/total))\n",
" accuracy_rote += group['ACCURACY'].sum()/(len(group)/total)\n",
"\n",
"print(accuracy_rote)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "base",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}