255 lines
9.3 KiB
Python
255 lines
9.3 KiB
Python
import numpy as np
|
||
from sklearn.preprocessing import MinMaxScaler
|
||
from sklearn.metrics import mean_squared_error
|
||
from sklearn.model_selection import train_test_split
|
||
from tensorflow.keras.models import Sequential
|
||
from tensorflow.keras.layers import LSTM, Dense
|
||
import datetime
|
||
import matplotlib.pyplot as plt
|
||
import pandas as pd
|
||
import os
|
||
import random
|
||
import string
|
||
import time
|
||
import base64
|
||
from hashlib import sha256
|
||
from hmac import HMAC
|
||
import requests
|
||
import csv
|
||
from numpy import concatenate
|
||
from math import sqrt
|
||
|
||
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
|
||
'''
|
||
将时间序列数据转换为监督学习数据
|
||
:param data:数据集
|
||
:param n_in: 输入序列长度,默认为1
|
||
:param n_out:输出序列长度,默认为1
|
||
:param dropnan:
|
||
:return:
|
||
'''
|
||
n_vars = 1 if type(data) is list else data.shape[1]
|
||
df = pd.DataFrame(data)
|
||
cols, names = list(), list()
|
||
# input sequence (t-n, ... t-1)
|
||
# 将3组输入数据依次向下移动3,2,1行,将数据加入cols列表(技巧:(n_in, 0, -1)中的-1指倒序循环,步长为1)
|
||
for i in range(n_in, 0, -1):
|
||
cols.append(df.shift(i))
|
||
names += [('var%d(t-%d)' % (j + 1, i)) for j in range(n_vars)]
|
||
# forecast sequence (t, t+1, ... t+n)
|
||
# 将一组输出数据加入cols列表(技巧:其中i=0)
|
||
for i in range(0, n_out):
|
||
cols.append(df.shift(-i))
|
||
if i == 0:
|
||
names += [('var%d(t)' % (j + 1)) for j in range(n_vars)]
|
||
else:
|
||
names += [('var%d(t+%d)' % (j + 1, i)) for j in range(n_vars)]
|
||
# cols列表(list)中现在有四块经过下移后的数据(即:df(-3),df(-2),df(-1),df),将四块数据按列 并排合并
|
||
agg = pd.concat(cols, axis=1)
|
||
# 给合并后的数据添加列名
|
||
agg.columns = names
|
||
# 删除NaN值列
|
||
if dropnan:
|
||
agg.dropna(inplace=True)
|
||
return agg
|
||
|
||
def createXY(dataset,n_past):
|
||
dataX = []
|
||
dataY = []
|
||
print(dataset.shape[1])
|
||
for i in range(n_past, len(dataset)):
|
||
dataX.append(dataset[i - n_past:i, 0:dataset.shape[1]])
|
||
dataY.append(dataset[i,0])
|
||
return np.array(dataX),np.array(dataY)
|
||
|
||
def ex_Lstm_M(df,n_days=14,out_days=7,is_debug=False,datasetpath=''):
|
||
# dataset = pd.read_csv('brentpricepredict.csv',encoding='utf-8')
|
||
dataset = df.copy()
|
||
dataset.set_index('ds', inplace=True)
|
||
|
||
values = dataset.values
|
||
if is_debug:
|
||
# values = values[-1000:]
|
||
pass
|
||
# 标准化/放缩 特征值在(0,1)之间
|
||
scaler = MinMaxScaler(feature_range=(0, 1))
|
||
scaled = scaler.fit_transform(values)
|
||
# 用14天数据预测七天数据
|
||
n_features = dataset.shape[1]
|
||
# 构造一个14->7的监督学习型数据
|
||
reframed = series_to_supervised(scaled, n_days, out_days)
|
||
|
||
# 切分数据集
|
||
values = reframed.values
|
||
# 用80%的数据来训练,20%的数据来测试
|
||
n_train = int(len(dataset) * 0.8)
|
||
train = values[:n_train, :]
|
||
test = values[n_train:, :]
|
||
# 切分输入输出
|
||
n_obs = n_days * n_features
|
||
# 倒数第19列作为Y
|
||
train_X, train_y = train[:, :n_obs], train[:, -n_features]
|
||
test_X, test_y = test[:, :n_obs], test[:, -n_features]
|
||
# 将数据转换为3D输入,timesteps=14,14条数据预测7条 [samples, timesteps, features]
|
||
train_X = train_X.reshape((train_X.shape[0], n_days, n_features))
|
||
test_X = test_X.reshape((test_X.shape[0], n_days, n_features))
|
||
print(train_X.shape, train_y.shape, test_X.shape, test_y.shape)
|
||
|
||
# 设计网络
|
||
model = Sequential()
|
||
model.add(LSTM(50, input_shape=(train_X.shape[1], train_X.shape[2])))
|
||
model.add(Dense(1))
|
||
model.compile(loss='mae', optimizer='adam')
|
||
# 拟合网络
|
||
history = model.fit(train_X, train_y, epochs=100, batch_size=72, validation_data=(test_X, test_y), verbose=2,
|
||
shuffle=False)
|
||
|
||
# 执行预测
|
||
yhat = model.predict(test_X)
|
||
# 将数据格式化成 n行 * 24列
|
||
test_X = test_X.reshape((test_X.shape[0], n_days * n_features))
|
||
# 将预测列据和后7列数据拼接,因后续逆缩放时,数据形状要符合 n行*20列 的要求
|
||
inv_yhat = concatenate((yhat, test_X[:, -n_features+1:]), axis=1)
|
||
# 对拼接好的数据进行逆缩放
|
||
inv_yhat = scaler.inverse_transform(inv_yhat)
|
||
inv_yhat = inv_yhat[:, 0]
|
||
print(inv_yhat)
|
||
|
||
test_y = test_y.reshape((len(test_y), 1))
|
||
# 将真实列据和后7列数据拼接,因后续逆缩放时,数据形状要符合 n行*20列 的要求
|
||
inv_y = concatenate((test_y, test_X[:, -n_features+1:]), axis=1)
|
||
# 对拼接好的数据进行逆缩放
|
||
inv_y = scaler.inverse_transform(inv_y)
|
||
inv_y = inv_y[:, 0]
|
||
|
||
# 计算RMSE
|
||
rmse = sqrt(mean_squared_error(inv_y, inv_yhat))
|
||
print('Test RMSE: %.3f' % rmse)
|
||
|
||
# 可视化结果
|
||
# 保留n天历史数据
|
||
n = len(inv_y) - 7
|
||
# 设置要可视化的值
|
||
time_axis_data = np.array(range(n))
|
||
time_axis_future = np.array(range(n + 7))
|
||
inv_y = inv_y[-n:]
|
||
inv_yhat = inv_yhat[-n-7:]
|
||
|
||
# Plot data and future predictions
|
||
fig, ax = plt.subplots(2, 1, gridspec_kw={'height_ratios': [5, 4]})
|
||
# 设置画布大小
|
||
fig.set_size_inches(6, 6)
|
||
# 第一个子图画历史价格和预测价格
|
||
ax[0].plot(time_axis_data, inv_y, label='历史价格')
|
||
ax[0].plot(time_axis_future, inv_yhat, linestyle='dashed', label='预测价格')
|
||
ax[0].set_xlabel('时间')
|
||
ax[0].set_ylabel('价格')
|
||
ax[0].legend()
|
||
# 设置标题
|
||
ax[0].set_title('布伦特_多价格预测')
|
||
# 设置y轴范围
|
||
ax[0].set_ylim(50, 120)
|
||
|
||
# 第二个子图画表格,展示预测价格
|
||
ax[1].axis('off')
|
||
table_data = [[f"Day {i + 1}", "{:.2f}".format(val)] for i, val in enumerate(inv_yhat[-7:])]
|
||
table = ax[1].table(cellText=table_data, colLabels=['Day', 'Prediction'], loc='center')
|
||
# 设置表格内容居中
|
||
table.auto_set_font_size(True)
|
||
# 保存图片
|
||
filename = os.path.basename(__file__).split('.')[0]
|
||
|
||
plt.savefig(os.path.join(datasetpath,filename + '_M.png'))
|
||
# plt.show()
|
||
|
||
|
||
def ex_Lstm(df,input_seq_len=50, output_seq_len=7,is_debug=False,dataset=''):
|
||
|
||
# 将日期列转换为 datetime 类型(如果尚未转换)
|
||
df['ds'] = pd.to_datetime(df['ds'])
|
||
# 分离出数值列(排除日期列)
|
||
numeric_df = df.select_dtypes(include=['int64', 'float64'])
|
||
|
||
prices = df
|
||
# prices = df
|
||
# print(data1)
|
||
# Remove any NaN values
|
||
df = df.drop('ds', axis=1)
|
||
prices = np.array(df, dtype=float) # convert to NumPy array of floats
|
||
prices = prices[~np.isnan(prices)]
|
||
if is_debug:
|
||
prices = prices[-300:]
|
||
|
||
|
||
# Prepare input sequences
|
||
inputs = []
|
||
for i in range(len(prices)-input_seq_len-output_seq_len+1):
|
||
inputs.append(prices[i:i+input_seq_len])
|
||
inputs = np.array(inputs)
|
||
|
||
# Prepare output sequences
|
||
outputs = []
|
||
for i in range(input_seq_len, len(prices)-output_seq_len+1):
|
||
outputs.append(prices[i:i+output_seq_len])
|
||
outputs = np.array(outputs)
|
||
|
||
# Split dataset into training and testing sets
|
||
X_train, X_test, y_train, y_test = train_test_split(inputs, outputs, test_size=0.2)
|
||
|
||
# Normalize data
|
||
scaler_in = MinMaxScaler()
|
||
X_train = scaler_in.fit_transform(X_train)
|
||
X_test = scaler_in.transform(X_test)
|
||
|
||
scaler_out = MinMaxScaler()
|
||
y_train = scaler_out.fit_transform(y_train)
|
||
y_test = scaler_out.transform(y_test)
|
||
|
||
# Define LSTM model
|
||
model = Sequential()
|
||
model.add(LSTM(128, activation='relu', input_shape=(input_seq_len, 1)))
|
||
model.add(Dense(output_seq_len))
|
||
model.compile(optimizer='adam', loss='mse')
|
||
|
||
# Train LSTM model
|
||
model.fit(X_train.reshape(-1, input_seq_len, 1), y_train, epochs=100, batch_size=64, validation_data=(X_test.reshape(-1, input_seq_len, 1), y_test))
|
||
|
||
# Evaluate LSTM model
|
||
mse = model.evaluate(X_test.reshape(-1, input_seq_len, 1), y_test)
|
||
|
||
# Make future predictions
|
||
future_inputs = np.array([prices[-input_seq_len:]])
|
||
future_inputs = scaler_in.transform(future_inputs)
|
||
future_predictions = model.predict(future_inputs.reshape(-1, input_seq_len, 1))
|
||
future_predictions = scaler_out.inverse_transform(future_predictions)[0]
|
||
|
||
# Print results
|
||
print("MSE: ", mse)
|
||
print("Future predictions: ", future_predictions)
|
||
|
||
# Generate time axis for data and future predictions
|
||
time_axis_data = np.arange(len(prices))
|
||
time_axis_future = np.arange(len(prices), len(prices) + len(future_predictions))
|
||
|
||
# Concatenate time axis and data
|
||
time_axis = np.concatenate((time_axis_data, time_axis_future))
|
||
|
||
# Concatenate data and future predictions
|
||
data_and_predictions = np.concatenate((prices, future_predictions))
|
||
|
||
# Plot data and future predictions
|
||
fig, ax = plt.subplots(2, 1, gridspec_kw={'height_ratios': [3, 1]})
|
||
|
||
# First subplot: Data and Future Predictions
|
||
ax[0].plot(time_axis, data_and_predictions, label='Data and Future Predictions')
|
||
ax[0].plot(time_axis_future, future_predictions, linestyle='dashed', label='Future Predictions')
|
||
ax[0].set_xlabel('Time')
|
||
ax[0].set_ylabel('Price')
|
||
ax[0].legend()
|
||
|
||
# Second subplot: Table for Future Predictions
|
||
ax[1].axis('off')
|
||
table_data = [[f"Day {i+1}", "{:.2f}".format(val)] for i, val in enumerate(future_predictions)]
|
||
table = ax[1].table(cellText=table_data, colLabels=['Day', 'Prediction'], loc='center')
|
||
plt.savefig(os.path.join(dataset,'lstmmodels.png'))
|
||
# plt.show() |