681 lines
31 KiB
Plaintext
681 lines
31 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 9,
|
||
"id": "9daadf20-caa6-4b25-901c-6cc3ef563f58",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"(255, 28)\n",
|
||
"(78, 4)\n",
|
||
"(255, 31)\n",
|
||
" ds NHITS Informer LSTM iTransformer TSMixer \\\n",
|
||
"0 2024-10-08 79.76823 80.197660 79.802414 78.391460 80.200510 \n",
|
||
"1 2024-10-09 78.75903 80.235740 79.844154 78.888565 79.861180 \n",
|
||
"2 2024-10-10 79.43970 80.186230 79.885100 79.488700 79.483086 \n",
|
||
"3 2024-10-11 79.62268 80.502975 79.878560 79.406670 79.313965 \n",
|
||
"4 2024-10-14 79.91698 80.931946 79.936270 79.758575 79.197430 \n",
|
||
"\n",
|
||
" TSMixerx PatchTST RNN GRU ... y \\\n",
|
||
"0 79.243256 80.883450 80.836105 81.276060 ... 77.180000 \n",
|
||
"1 78.068150 80.950096 80.917860 81.303505 ... 76.580002 \n",
|
||
"2 77.189064 80.347400 80.866040 81.798050 ... 79.400002 \n",
|
||
"3 77.840096 80.545296 81.167710 81.552810 ... 79.040001 \n",
|
||
"4 77.904300 81.432976 81.144210 81.483215 ... 77.459999 \n",
|
||
"\n",
|
||
" min_within_quantile max_within_quantile id CREAT_DATE min_price \\\n",
|
||
"0 80.200510 81.163630 51 2024-10-07 79.848624 \n",
|
||
"1 79.861180 81.757850 52 2024-10-07 79.981211 \n",
|
||
"2 79.483086 81.190400 53 2024-10-07 79.398409 \n",
|
||
"3 79.313965 81.371100 54 2024-10-07 79.394607 \n",
|
||
"4 79.197430 81.432976 55 2024-10-07 79.351007 \n",
|
||
"\n",
|
||
" max_price 序号 LOW_PRICE HIGH_PRICE \n",
|
||
"0 81.848624 52.0 76.36 81.14 \n",
|
||
"1 81.981211 51.0 75.15 78.02 \n",
|
||
"2 81.398409 50.0 76.72 79.72 \n",
|
||
"3 81.394607 49.0 78.04 79.50 \n",
|
||
"4 81.351007 48.0 74.86 78.55 \n",
|
||
"\n",
|
||
"[5 rows x 31 columns]\n",
|
||
" ds NHITS Informer LSTM iTransformer TSMixer \\\n",
|
||
"250 2024-12-16 74.268654 73.333750 73.090164 74.007034 74.36094 \n",
|
||
"251 2024-12-17 74.724630 73.373810 73.242540 74.285530 74.24597 \n",
|
||
"252 2024-12-18 74.948860 73.505330 73.400400 74.260290 74.06419 \n",
|
||
"253 2024-12-19 74.396740 73.934380 73.764320 74.429800 74.18059 \n",
|
||
"254 2024-12-20 73.882930 73.700935 73.769050 73.977585 73.97370 \n",
|
||
"\n",
|
||
" TSMixerx PatchTST RNN GRU ... y min_within_quantile \\\n",
|
||
"250 74.67781 74.475680 75.63023 74.853800 ... NaN 74.157196 \n",
|
||
"251 74.46460 74.628000 75.22519 74.957530 ... NaN 73.711680 \n",
|
||
"252 74.32628 74.656815 75.49716 74.890236 ... NaN 74.064190 \n",
|
||
"253 74.41026 74.698875 75.87007 75.118866 ... NaN 74.148070 \n",
|
||
"254 74.49235 74.345410 75.88466 75.186325 ... NaN 73.816990 \n",
|
||
"\n",
|
||
" max_within_quantile id CREAT_DATE min_price max_price 序号 \\\n",
|
||
"250 74.576454 301 2024-12-16 73.416857 75.416857 3.0 \n",
|
||
"251 74.948060 302 2024-12-16 73.434301 75.434301 2.0 \n",
|
||
"252 75.200980 303 2024-12-16 73.707471 75.707471 1.0 \n",
|
||
"253 75.395440 304 2024-12-16 73.639791 75.639791 NaN \n",
|
||
"254 74.345410 305 2024-12-16 73.067399 75.067399 NaN \n",
|
||
"\n",
|
||
" LOW_PRICE HIGH_PRICE \n",
|
||
"250 72.53 73.28 \n",
|
||
"251 72.48 74.18 \n",
|
||
"252 72.80 74.20 \n",
|
||
"253 NaN NaN \n",
|
||
"254 NaN NaN \n",
|
||
"\n",
|
||
"[5 rows x 31 columns]\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"import sqlite3\n",
|
||
"import os\n",
|
||
"import pandas as pd\n",
|
||
"\n",
|
||
"dataset = r'yuanyoudataset'\n",
|
||
"\n",
|
||
"# dataset = r'C:\\Users\\Administrator\\Desktop' \n",
|
||
"\n",
|
||
"# 预测价格数据\n",
|
||
"# dbfilename = os.path.join(r'D:\\code\\PriceForecast\\yuanyoudataset','jbsh_yuanyou.db')\n",
|
||
"# conn = sqlite3.connect(dbfilename)\n",
|
||
"# query = 'SELECT * FROM accuracy'\n",
|
||
"# df1 = pd.read_sql_query(query, conn)\n",
|
||
"# df1['ds'] = df1['PREDICT_DATE']\n",
|
||
"# conn.close()\n",
|
||
"# print(df1.shape)\n",
|
||
"\n",
|
||
"# 预测价格数据\n",
|
||
"dfcsvfilename = os.path.join(dataset,'accuracy_five_mean.csv')\n",
|
||
"df1 = pd.read_csv(dfcsvfilename)\n",
|
||
"print(df1.shape)\n",
|
||
"\n",
|
||
"# 最高最低价\n",
|
||
"xlsfilename = os.path.join(dataset,'数据项下载.xls')\n",
|
||
"df2 = pd.read_excel(xlsfilename)[5:]\n",
|
||
"df2 = df2.rename(columns = {'数据项名称':'ds','布伦特最低价':'LOW_PRICE','布伦特最高价':'HIGH_PRICE'})\n",
|
||
"print(df2.shape)\n",
|
||
"\n",
|
||
"\n",
|
||
"\n",
|
||
"df = pd.merge(df1,df2,on=['ds'],how='left')\n",
|
||
"\n",
|
||
"df['ds'] = pd.to_datetime(df['ds'])\n",
|
||
"# df['PREDICT_DATE'] = pd.to_datetime(df['PREDICT_DATE'])\n",
|
||
"df = df.reindex()\n",
|
||
"\n",
|
||
"print(df.shape)\n",
|
||
"# from datetime import datetime\n",
|
||
"import time\n",
|
||
"df.to_csv(os.path.join(dataset,f'预测数据-{time.time()}.csv'))\n",
|
||
"# df = df[['ds','min_within_quantile','max_within_quantile']]\n",
|
||
"\n",
|
||
"\n",
|
||
"\n",
|
||
"# 打印数据框的前几行\n",
|
||
"print(df.head())\n",
|
||
"print(df.tail())\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 10,
|
||
"id": "0d77ab7d",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# 模型评估前五均值 \n",
|
||
"df['min_price'] = df.iloc[:,1:6].mean(axis=1) -1.5\n",
|
||
"df['max_price'] = df.iloc[:,1:6].mean(axis=1) +1.5"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 11,
|
||
"id": "e51c3fd0-6bff-45de-b8b6-971e7986c7a7",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
" 开始日期 结束日期 准确率\n",
|
||
"0 2024-09-27 2024-10-04 0\n",
|
||
" 开始日期 结束日期 准确率\n",
|
||
"0 2024-09-27 2024-10-04 0\n",
|
||
" 开始日期 结束日期 准确率\n",
|
||
"0 2024-09-27 2024-10-04 0\n",
|
||
" 开始日期 结束日期 准确率\n",
|
||
"0 2024-09-27 2024-10-04 0\n",
|
||
" 开始日期 结束日期 准确率\n",
|
||
"0 2024-09-27 2024-10-04 0\n",
|
||
" 开始日期 结束日期 准确率\n",
|
||
"0 2024-10-04 2024-10-11 0.495046\n",
|
||
" 开始日期 结束日期 准确率\n",
|
||
"0 2024-10-04 2024-10-11 0.495046\n",
|
||
" 开始日期 结束日期 准确率\n",
|
||
"0 2024-10-04 2024-10-11 0.495046\n",
|
||
" 开始日期 结束日期 准确率\n",
|
||
"0 2024-10-04 2024-10-11 0.495046\n",
|
||
" 开始日期 结束日期 准确率\n",
|
||
"0 2024-10-04 2024-10-11 0.495046\n",
|
||
" 开始日期 结束日期 准确率\n",
|
||
"0 2024-10-11 2024-10-18 0.449368\n",
|
||
" 开始日期 结束日期 准确率\n",
|
||
"0 2024-10-11 2024-10-18 0.449368\n",
|
||
" 开始日期 结束日期 准确率\n",
|
||
"0 2024-10-11 2024-10-18 0.449368\n",
|
||
" 开始日期 结束日期 准确率\n",
|
||
"0 2024-10-11 2024-10-18 0.449368\n",
|
||
" 开始日期 结束日期 准确率\n",
|
||
"0 2024-10-11 2024-10-18 0.449368\n",
|
||
" 开始日期 结束日期 准确率\n",
|
||
"0 2024-10-18 2024-10-25 0.814057\n",
|
||
" 开始日期 结束日期 准确率\n",
|
||
"0 2024-10-18 2024-10-25 0.814057\n",
|
||
" 开始日期 结束日期 准确率\n",
|
||
"0 2024-10-18 2024-10-25 0.814057\n",
|
||
" 开始日期 结束日期 准确率\n",
|
||
"0 2024-10-18 2024-10-25 0.814057\n",
|
||
" 开始日期 结束日期 准确率\n",
|
||
"0 2024-10-18 2024-10-25 0.814057\n",
|
||
" 开始日期 结束日期 准确率\n",
|
||
"0 2024-10-25 2024-11-01 0.433599\n",
|
||
" 开始日期 结束日期 准确率\n",
|
||
"0 2024-10-25 2024-11-01 0.433599\n",
|
||
" 开始日期 结束日期 准确率\n",
|
||
"0 2024-10-25 2024-11-01 0.433599\n",
|
||
" 开始日期 结束日期 准确率\n",
|
||
"0 2024-10-25 2024-11-01 0.433599\n",
|
||
" 开始日期 结束日期 准确率\n",
|
||
"0 2024-10-25 2024-11-01 0.433599\n",
|
||
" 开始日期 结束日期 准确率\n",
|
||
"0 2024-11-01 2024-11-08 0.894767\n",
|
||
" 开始日期 结束日期 准确率\n",
|
||
"0 2024-11-01 2024-11-08 0.894767\n",
|
||
" 开始日期 结束日期 准确率\n",
|
||
"0 2024-11-01 2024-11-08 0.894767\n",
|
||
" 开始日期 结束日期 准确率\n",
|
||
"0 2024-11-01 2024-11-08 0.894767\n",
|
||
" 开始日期 结束日期 准确率\n",
|
||
"0 2024-11-01 2024-11-08 0.894767\n",
|
||
" 开始日期 结束日期 准确率\n",
|
||
"0 2024-11-08 2024-11-15 0.915721\n",
|
||
" 开始日期 结束日期 准确率\n",
|
||
"0 2024-11-08 2024-11-15 0.915721\n",
|
||
" 开始日期 结束日期 准确率\n",
|
||
"0 2024-11-08 2024-11-15 0.915721\n",
|
||
" 开始日期 结束日期 准确率\n",
|
||
"0 2024-11-08 2024-11-15 0.915721\n",
|
||
" 开始日期 结束日期 准确率\n",
|
||
"0 2024-11-08 2024-11-15 0.915721\n",
|
||
" 开始日期 结束日期 准确率\n",
|
||
"0 2024-11-15 2024-11-22 0.835755\n",
|
||
" 开始日期 结束日期 准确率\n",
|
||
"0 2024-11-15 2024-11-22 0.835755\n",
|
||
" 开始日期 结束日期 准确率\n",
|
||
"0 2024-11-15 2024-11-22 0.835755\n",
|
||
" 开始日期 结束日期 准确率\n",
|
||
"0 2024-11-15 2024-11-22 0.835755\n",
|
||
" 开始日期 结束日期 准确率\n",
|
||
"0 2024-11-15 2024-11-22 0.835755\n",
|
||
" 开始日期 结束日期 准确率\n",
|
||
"0 2024-11-22 2024-11-29 0.718009\n",
|
||
" 开始日期 结束日期 准确率\n",
|
||
"0 2024-11-22 2024-11-29 0.718009\n",
|
||
" 开始日期 结束日期 准确率\n",
|
||
"0 2024-11-22 2024-11-29 0.718009\n",
|
||
" 开始日期 结束日期 准确率\n",
|
||
"0 2024-11-22 2024-11-29 0.718009\n",
|
||
" 开始日期 结束日期 准确率\n",
|
||
"0 2024-11-22 2024-11-29 0.718009\n",
|
||
" 开始日期 结束日期 准确率\n",
|
||
"0 2024-11-29 2024-12-06 0.948363\n",
|
||
" 开始日期 结束日期 准确率\n",
|
||
"0 2024-11-29 2024-12-06 0.948363\n",
|
||
" 开始日期 结束日期 准确率\n",
|
||
"0 2024-11-29 2024-12-06 0.948363\n",
|
||
" 开始日期 结束日期 准确率\n",
|
||
"0 2024-11-29 2024-12-06 0.948363\n",
|
||
" 开始日期 结束日期 准确率\n",
|
||
"0 2024-11-29 2024-12-06 0.948363\n",
|
||
" 开始日期 结束日期 准确率\n",
|
||
"0 2024-12-06 2024-12-13 0.947006\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# 定义一个函数来计算准确率\n",
|
||
"# 比较真实最高最低,和预测最高最低 计算准确率\n",
|
||
"def calculate_accuracy(row):\n",
|
||
" # 全子集情况:\n",
|
||
" if (row['max_price'] >= row['HIGH_PRICE'] and row['min_price'] <= row['LOW_PRICE']) or \\\n",
|
||
" (row['max_price'] <= row['HIGH_PRICE'] and row['min_price'] >= row['LOW_PRICE']):\n",
|
||
" return 1 \n",
|
||
" # 无交集情况:\n",
|
||
" if row['max_price'] < row['LOW_PRICE'] or \\\n",
|
||
" row['min_price'] > row['HIGH_PRICE']:\n",
|
||
" return 0\n",
|
||
" # 有交集情况:\n",
|
||
" else:\n",
|
||
" sorted_prices = sorted([row['LOW_PRICE'], row['min_price'], row['max_price'], row['HIGH_PRICE']])\n",
|
||
" middle_diff = sorted_prices[2] - sorted_prices[1]\n",
|
||
" price_range = row['HIGH_PRICE'] - row['LOW_PRICE']\n",
|
||
" accuracy = middle_diff / price_range\n",
|
||
" return accuracy\n",
|
||
"import datetime\n",
|
||
"weight_dict = [0.4,0.15,0.1,0.1,0.25] # 权重\n",
|
||
"\n",
|
||
"columns = ['HIGH_PRICE','LOW_PRICE','min_price','max_price']\n",
|
||
"df[columns] = df[columns].astype(float)\n",
|
||
"df['ACCURACY'] = df.apply(calculate_accuracy, axis=1)\n",
|
||
"# df['ACCURACY'] = df.apply(is_within_range, axis=1)\n",
|
||
"# 取结束日期上一周的日期\n",
|
||
"def get_week_date(end_time):\n",
|
||
" endtime = end_time\n",
|
||
" endtimeweek = datetime.datetime.strptime(endtime, '%Y-%m-%d')\n",
|
||
" up_week = endtimeweek - datetime.timedelta(days=endtimeweek.weekday() + 14)\n",
|
||
" up_week_dates = [up_week + datetime.timedelta(days=i) for i in range(14)][4:-2]\n",
|
||
" up_week_dates = [date.strftime('%Y-%m-%d') for date in up_week_dates]\n",
|
||
" return up_week_dates\n",
|
||
"\n",
|
||
"# 计算准确率并保存结果\n",
|
||
"def _get_accuracy_rate(df,up_week_dates,endtime):\n",
|
||
" df3 = df.copy()\n",
|
||
" df3 = df3[df3['CREAT_DATE'].isin(up_week_dates)]\n",
|
||
" df3 = df3[df3['ds'].isin(up_week_dates)]\n",
|
||
" accuracy_rote = 0\n",
|
||
" for i,group in df3.groupby('ds'):\n",
|
||
" # print('权重:',weight_dict[len(group)-1])\n",
|
||
" # print('准确率:',(group['ACCURACY'].sum()/len(group))*weight_dict[len(group)-1])\n",
|
||
" accuracy_rote += (group['ACCURACY'].sum()/len(group))*weight_dict[len(group)-1]\n",
|
||
" df3.to_csv(os.path.join(dataset,f'accuracy_{endtime}.csv'),index=False)\n",
|
||
" df4 = pd.DataFrame(columns=['开始日期','结束日期','准确率'])\n",
|
||
" df4.loc[len(df4)] = {'开始日期':up_week_dates[0],'结束日期':up_week_dates[-1],'准确率':accuracy_rote}\n",
|
||
" df4.to_csv(os.path.join(dataset,f'accuracy_rote_{endtime}.csv'),index=False)\n",
|
||
" print(df4)\n",
|
||
" # df4.to_sql(\"accuracy_rote\", con=sqlitedb.connection, if_exists='append', index=False)\n",
|
||
"\n",
|
||
"\n",
|
||
"end_times = df['CREAT_DATE'].unique()\n",
|
||
"for endtime in end_times:\n",
|
||
" up_week_dates = get_week_date(endtime)\n",
|
||
" _get_accuracy_rate(df,up_week_dates,endtime)\n",
|
||
"\n",
|
||
"# 打印结果\n",
|
||
"\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 12,
|
||
"id": "0f942c69",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"ename": "KeyError",
|
||
"evalue": "'PREDICT_DATE'",
|
||
"output_type": "error",
|
||
"traceback": [
|
||
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
||
"\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)",
|
||
"File \u001b[1;32md:\\ProgramData\\anaconda3\\Lib\\site-packages\\pandas\\core\\indexes\\base.py:3791\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 3790\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m-> 3791\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_engine\u001b[38;5;241m.\u001b[39mget_loc(casted_key)\n\u001b[0;32m 3792\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n",
|
||
"File \u001b[1;32mindex.pyx:152\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[1;34m()\u001b[0m\n",
|
||
"File \u001b[1;32mindex.pyx:181\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[1;34m()\u001b[0m\n",
|
||
"File \u001b[1;32mpandas\\_libs\\hashtable_class_helper.pxi:7080\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[1;34m()\u001b[0m\n",
|
||
"File \u001b[1;32mpandas\\_libs\\hashtable_class_helper.pxi:7088\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[1;34m()\u001b[0m\n",
|
||
"\u001b[1;31mKeyError\u001b[0m: 'PREDICT_DATE'",
|
||
"\nThe above exception was the direct cause of the following exception:\n",
|
||
"\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)",
|
||
"Cell \u001b[1;32mIn[12], line 4\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[38;5;66;03m# ds 按周取\u001b[39;00m\n\u001b[0;32m 3\u001b[0m df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mDs_Week\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mds\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mapply(\u001b[38;5;28;01mlambda\u001b[39;00m x: x\u001b[38;5;241m.\u001b[39mstrftime(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m%\u001b[39m\u001b[38;5;124mU\u001b[39m\u001b[38;5;124m'\u001b[39m))\n\u001b[1;32m----> 4\u001b[0m df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mPre_Week\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mPREDICT_DATE\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mapply(\u001b[38;5;28;01mlambda\u001b[39;00m x: x\u001b[38;5;241m.\u001b[39mstrftime(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m%\u001b[39m\u001b[38;5;124mU\u001b[39m\u001b[38;5;124m'\u001b[39m))\n",
|
||
"File \u001b[1;32md:\\ProgramData\\anaconda3\\Lib\\site-packages\\pandas\\core\\frame.py:3893\u001b[0m, in \u001b[0;36mDataFrame.__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 3891\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcolumns\u001b[38;5;241m.\u001b[39mnlevels \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[0;32m 3892\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_getitem_multilevel(key)\n\u001b[1;32m-> 3893\u001b[0m indexer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcolumns\u001b[38;5;241m.\u001b[39mget_loc(key)\n\u001b[0;32m 3894\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_integer(indexer):\n\u001b[0;32m 3895\u001b[0m indexer \u001b[38;5;241m=\u001b[39m [indexer]\n",
|
||
"File \u001b[1;32md:\\ProgramData\\anaconda3\\Lib\\site-packages\\pandas\\core\\indexes\\base.py:3798\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 3793\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(casted_key, \u001b[38;5;28mslice\u001b[39m) \u001b[38;5;129;01mor\u001b[39;00m (\n\u001b[0;32m 3794\u001b[0m \u001b[38;5;28misinstance\u001b[39m(casted_key, abc\u001b[38;5;241m.\u001b[39mIterable)\n\u001b[0;32m 3795\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28many\u001b[39m(\u001b[38;5;28misinstance\u001b[39m(x, \u001b[38;5;28mslice\u001b[39m) \u001b[38;5;28;01mfor\u001b[39;00m x \u001b[38;5;129;01min\u001b[39;00m casted_key)\n\u001b[0;32m 3796\u001b[0m ):\n\u001b[0;32m 3797\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m InvalidIndexError(key)\n\u001b[1;32m-> 3798\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01merr\u001b[39;00m\n\u001b[0;32m 3799\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n\u001b[0;32m 3800\u001b[0m \u001b[38;5;66;03m# If we have a listlike key, _check_indexing_error will raise\u001b[39;00m\n\u001b[0;32m 3801\u001b[0m \u001b[38;5;66;03m# InvalidIndexError. Otherwise we fall through and re-raise\u001b[39;00m\n\u001b[0;32m 3802\u001b[0m \u001b[38;5;66;03m# the TypeError.\u001b[39;00m\n\u001b[0;32m 3803\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_check_indexing_error(key)\n",
|
||
"\u001b[1;31mKeyError\u001b[0m: 'PREDICT_DATE'"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"import datetime\n",
|
||
"# ds 按周取\n",
|
||
"df['Ds_Week'] = df['ds'].apply(lambda x: x.strftime('%U'))\n",
|
||
"df['Pre_Week'] = df['PREDICT_DATE'].apply(lambda x: x.strftime('%U'))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "a7b05510",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>ds</th>\n",
|
||
" <th>ACCURACY</th>\n",
|
||
" <th>PREDICT_DATE</th>\n",
|
||
" <th>CREAT_DATE</th>\n",
|
||
" <th>HIGH_PRICE_y</th>\n",
|
||
" <th>LOW_PRICE_y</th>\n",
|
||
" <th>MIN_PRICE</th>\n",
|
||
" <th>MAX_PRICE</th>\n",
|
||
" <th>Ds_Week</th>\n",
|
||
" <th>Pre_Week</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>2024-11-26</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>2024-11-26</td>\n",
|
||
" <td>2024-11-25</td>\n",
|
||
" <td>73.80</td>\n",
|
||
" <td>71.63</td>\n",
|
||
" <td>71.071556</td>\n",
|
||
" <td>76.006900</td>\n",
|
||
" <td>47</td>\n",
|
||
" <td>47</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2024-11-27</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>2024-11-27</td>\n",
|
||
" <td>2024-11-25</td>\n",
|
||
" <td>72.85</td>\n",
|
||
" <td>71.71</td>\n",
|
||
" <td>71.003624</td>\n",
|
||
" <td>75.580560</td>\n",
|
||
" <td>47</td>\n",
|
||
" <td>47</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>2024-11-28</td>\n",
|
||
" <td>0.789324</td>\n",
|
||
" <td>2024-11-28</td>\n",
|
||
" <td>2024-11-25</td>\n",
|
||
" <td>72.96</td>\n",
|
||
" <td>71.85</td>\n",
|
||
" <td>72.083850</td>\n",
|
||
" <td>76.204260</td>\n",
|
||
" <td>47</td>\n",
|
||
" <td>47</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>2024-11-29</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>2024-11-29</td>\n",
|
||
" <td>2024-11-25</td>\n",
|
||
" <td>73.34</td>\n",
|
||
" <td>71.75</td>\n",
|
||
" <td>71.329730</td>\n",
|
||
" <td>75.703950</td>\n",
|
||
" <td>47</td>\n",
|
||
" <td>47</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>2024-12-02</td>\n",
|
||
" <td>0.853412</td>\n",
|
||
" <td>2024-12-02</td>\n",
|
||
" <td>2024-11-25</td>\n",
|
||
" <td>72.89</td>\n",
|
||
" <td>71.52</td>\n",
|
||
" <td>71.720825</td>\n",
|
||
" <td>76.264275</td>\n",
|
||
" <td>48</td>\n",
|
||
" <td>48</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>70</th>\n",
|
||
" <td>2024-11-25</td>\n",
|
||
" <td>0.118328</td>\n",
|
||
" <td>2024-11-25</td>\n",
|
||
" <td>2024-11-22</td>\n",
|
||
" <td>74.83</td>\n",
|
||
" <td>72.30</td>\n",
|
||
" <td>74.530630</td>\n",
|
||
" <td>76.673140</td>\n",
|
||
" <td>47</td>\n",
|
||
" <td>47</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>71</th>\n",
|
||
" <td>2024-11-26</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>2024-11-26</td>\n",
|
||
" <td>2024-11-22</td>\n",
|
||
" <td>73.80</td>\n",
|
||
" <td>71.63</td>\n",
|
||
" <td>74.440430</td>\n",
|
||
" <td>76.874565</td>\n",
|
||
" <td>47</td>\n",
|
||
" <td>47</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>72</th>\n",
|
||
" <td>2024-11-27</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>2024-11-27</td>\n",
|
||
" <td>2024-11-22</td>\n",
|
||
" <td>72.85</td>\n",
|
||
" <td>71.71</td>\n",
|
||
" <td>74.663180</td>\n",
|
||
" <td>76.734130</td>\n",
|
||
" <td>47</td>\n",
|
||
" <td>47</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>73</th>\n",
|
||
" <td>2024-11-28</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>2024-11-28</td>\n",
|
||
" <td>2024-11-22</td>\n",
|
||
" <td>72.96</td>\n",
|
||
" <td>71.85</td>\n",
|
||
" <td>74.708410</td>\n",
|
||
" <td>77.141050</td>\n",
|
||
" <td>47</td>\n",
|
||
" <td>47</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>74</th>\n",
|
||
" <td>2024-11-29</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>2024-11-29</td>\n",
|
||
" <td>2024-11-22</td>\n",
|
||
" <td>73.34</td>\n",
|
||
" <td>71.75</td>\n",
|
||
" <td>74.703210</td>\n",
|
||
" <td>77.746170</td>\n",
|
||
" <td>47</td>\n",
|
||
" <td>47</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>75 rows × 10 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" ds ACCURACY PREDICT_DATE CREAT_DATE HIGH_PRICE_y LOW_PRICE_y \\\n",
|
||
"0 2024-11-26 1.000000 2024-11-26 2024-11-25 73.80 71.63 \n",
|
||
"1 2024-11-27 1.000000 2024-11-27 2024-11-25 72.85 71.71 \n",
|
||
"2 2024-11-28 0.789324 2024-11-28 2024-11-25 72.96 71.85 \n",
|
||
"3 2024-11-29 1.000000 2024-11-29 2024-11-25 73.34 71.75 \n",
|
||
"4 2024-12-02 0.853412 2024-12-02 2024-11-25 72.89 71.52 \n",
|
||
".. ... ... ... ... ... ... \n",
|
||
"70 2024-11-25 0.118328 2024-11-25 2024-11-22 74.83 72.30 \n",
|
||
"71 2024-11-26 0.000000 2024-11-26 2024-11-22 73.80 71.63 \n",
|
||
"72 2024-11-27 0.000000 2024-11-27 2024-11-22 72.85 71.71 \n",
|
||
"73 2024-11-28 0.000000 2024-11-28 2024-11-22 72.96 71.85 \n",
|
||
"74 2024-11-29 0.000000 2024-11-29 2024-11-22 73.34 71.75 \n",
|
||
"\n",
|
||
" MIN_PRICE MAX_PRICE Ds_Week Pre_Week \n",
|
||
"0 71.071556 76.006900 47 47 \n",
|
||
"1 71.003624 75.580560 47 47 \n",
|
||
"2 72.083850 76.204260 47 47 \n",
|
||
"3 71.329730 75.703950 47 47 \n",
|
||
"4 71.720825 76.264275 48 48 \n",
|
||
".. ... ... ... ... \n",
|
||
"70 74.530630 76.673140 47 47 \n",
|
||
"71 74.440430 76.874565 47 47 \n",
|
||
"72 74.663180 76.734130 47 47 \n",
|
||
"73 74.708410 77.141050 47 47 \n",
|
||
"74 74.703210 77.746170 47 47 \n",
|
||
"\n",
|
||
"[75 rows x 10 columns]"
|
||
]
|
||
},
|
||
"execution_count": 139,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "1374e354",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"['2024-11-22', '2024-11-23', '2024-11-24', '2024-11-25', '2024-11-26', '2024-11-27', '2024-11-28', '2024-11-29']\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"(15, 10)\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# 取结束日期上一周的日期\n",
|
||
"endtime = '2024-12-3'\n",
|
||
"endtimeweek = datetime.datetime.strptime(endtime, '%Y-%m-%d')\n",
|
||
"up_week = endtimeweek - datetime.timedelta(days=endtimeweek.weekday() + 14)\n",
|
||
"up_week_dates = [up_week + datetime.timedelta(days=i) for i in range(14)][4:-2]\n",
|
||
"up_week_dates = [date.strftime('%Y-%m-%d') for date in up_week_dates]\n",
|
||
"print(up_week_dates)\n",
|
||
"\n",
|
||
"\n",
|
||
"df3 = df.copy()\n",
|
||
"df3 = df3[df3['CREAT_DATE'].isin(up_week_dates)]\n",
|
||
"df3 = df3[df3['PREDICT_DATE'].isin(up_week_dates)]\n",
|
||
"print(df3.shape)\n",
|
||
"df3.to_csv('up_week_dates.csv',index=False)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "8aa47e90",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"2024-11-25 00:00:00\n",
|
||
"权重: 0.07\n",
|
||
"准确率: 1.7749209486165771\n",
|
||
"2024-11-26 00:00:00\n",
|
||
"权重: 0.13\n",
|
||
"准确率: 7.5\n",
|
||
"2024-11-27 00:00:00\n",
|
||
"权重: 0.2\n",
|
||
"准确率: 8.034364035087705\n",
|
||
"2024-11-28 00:00:00\n",
|
||
"权重: 0.27\n",
|
||
"准确率: 9.718006756756724\n",
|
||
"2024-11-29 00:00:00\n",
|
||
"权重: 0.33\n",
|
||
"准确率: 10.824716981132076\n",
|
||
"37.85200872159308\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"total = len(df3)\n",
|
||
"accuracy_rote = 0\n",
|
||
"# for i,group in df3.groupby('CREAT_DATE'):\n",
|
||
"for i,group in df3.groupby('ds'):\n",
|
||
" print(i)\n",
|
||
" print('权重:',round(len(group)/total,2))\n",
|
||
" print('准确率:',group['ACCURACY'].sum()/(len(group)/total))\n",
|
||
" accuracy_rote += group['ACCURACY'].sum()/(len(group)/total)\n",
|
||
"\n",
|
||
"print(accuracy_rote)"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "base",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.11.7"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|