LSTM循环神经网络_预测股价-CSDN博客

import os
import json
import time
import math
import matplotlib.pyplot as plt
import numpy as np
from keras.utils import plot_model
import pandas as pd
import warnings
from keras.models import Sequential,load_model
import datetime as dt
from keras.layers import Dense,Activation,Dropout,LSTM
from keras.utils import plot_model
from keras.callbacks import EarlyStopping, ModelCheckpoint
from numpy import newaxis
warnings.filterwarnings("ignore")
%matplotlib inline

1.数据处理类

'''
从配置文件加载数据
'''
class DataLoader():
    '''
    file_name: 数据文件路径
    split_rate:训练数据占(训练数据+测试数据)的比例
    feature_cols: 特征的列集合
    '''
    def __init__(self,file_name, split_rate, feature_cols):
        dataframe = pd.read_csv(file_name)
        count_split_train = int(len(dataframe) * split_rate)
        self.data_train = dataframe.get(feature_cols).values[:count_split_train]
        self.data_test = dataframe.get(feature_cols).values[count_split_train:]
        self.len_train = len(self.data_train)
        self.len_test = len(self.data_test)
        self.len_train_windows = None
        
    def get_train_data(self, seq_len, normalise):
        data_x = []
        data_y = []
        for i in range(self.len_train - seq_len):
            x, y = self._next_window(i, seq_len, normalise)
            data_x.append(x)
            data_y.append(y)
        return np.array(data_x), np.array(data_y)
    
    def _next_window(self, i, seq_len, normalise):
        window = self.data_train[i: i + seq_len]
        window = self.normalise_windows(window, single_window = True)[0] if normalise else window
        x = window[:-1]
        y = window[-1, [0]]
        return x, y 
        
        
    def normalise_windows(self, window_data, single_window = False):
        normalised_data = []
        window_data = [window_data] if single_window else window_data
        # 都计算和第一条数据的同比涨幅
        for window in window_data:
            normalised_window = []
            for col_i in range(window.shape[1]):
                normalised_col = [((float(p) / float(window[0, col_i])) - 1)
                                   for p in window[: ,col_i]]
                normalised_window.append(normalised_col)
            normalised_window = np.array(normalised_window).T
            normalised_data.append(normalised_window)
        return np.array(normalised_data)                           
        
    def get_test_data(self, seq_len, normalise):
        data_windows = []
        for i in range(self.len_test - seq_len):
            data_windows.append(self.data_test[i: i + seq_len])
        data_windows = np.array(data_windows).astype(float)
        data_windows = self.normalise_windows(data_windows, single_window = False) if normalise else data_windows
        x = data_windows[:, :-1]
        y = data_windows[:, -1, [0]]
        return x,y

'''
计时器
'''
class Timer():
    def __init__(self):
        self.start_time = None
        
    def start(self):
        self.start_time = dt.datetime.now()
    
    def stop(self):
        end_time = dt.datetime.now()
        print('Time taken: %s'%(end_time - self.start_time))

2.模型类

'''
LSTM模型
'''
class LSTMModel():    
    def __init__(self):
        self.model = Sequential()
        
    def build_model(self, model_config):    
        timer = Timer()
        timer.start()
        
        #添加网络的层
        for layer in model_config['model']['layers']:
            neurons = layer['neurons'] if 'neurons' in layer else None
            dropout_rate = layer['rate'] if 'rate' in layer else None
            activation = layer['activation'] if 'activation' in layer else None
            return_seq = layer['return_seq'] if 'return_seq' in layer else None
            input_timesteps = layer['input_timesteps'] if 'input_timesteps' in layer else None
            input_dim = layer['input_dim'] if 'input_dim' in layer else None
            
            if layer['type'] == 'dense':
                self.model.add(Dense(neurons, activation=activation))
            if layer['type'] == 'lstm':
                self.model.add(LSTM(neurons, input_shape=(input_timesteps, input_dim), 
                                    return_sequences = return_seq))
            if layer['type'] == 'dropout':
                self.model.add(Dropout(dropout_rate))
        
        self.model.compile(loss=model_config['model']['loss'], optimizer=model_config['model']['optimizer'])
        print('model compiled')
        timer.stop()
        return self.model
    
    def train(self, x, y, epochs, batch_size, save_dir):
        timer = Timer()
        timer.start()
        print("model train started epochs %s batch_size %s "%(epochs, batch_size))
        save_file_name = os.path.join(save_dir, "%s-e%s.h5.keras"%(dt.datetime.now().strftime("%d%m%Y-%H%M%S"), str(epochs)))
        callbacks = [
            EarlyStopping(monitor='val_loss', patience=2),
            ModelCheckpoint(filepath=save_file_name, monitor="val_loss", save_best_only=True)
        ]
        self.model.fit(x, y, epochs=epochs,
                      batch_size=batch_size, callbacks=callbacks)
        self.model.save(save_file_name)
        print("model train completed. model save as ", save_file_name)
        timer.stop()
    
    def predict_sequences(self, data, window_size, predict_len, debug = False):
        print(" predict sequence multiple...")
        predict_seqs = []
        for i in range(int(len(data) / predict_len)):
            if debug:
                print("predict data shape ", data.shape)
            cur_frame = data[i * predict_len]
            if debug:
                print("cur_frame ", cur_frame)
            predicted = []
            for j in range(predict_len):
                predict_result = self.model.predict(cur_frame[newaxis, :,:])
                if debug:
                    print("predict_result ", predict_result)
                final_result = predict_result[0, 0]
                predicted.append(final_result)
                cur_frame = cur_frame[1:]
                if debug:
                    print("cur_frame ", cur_frame)
                cur_frame = np.insert(cur_frame, [window_size - 2], predicted[-1], axis=0)
                if debug:
                    print("cur_frame ", cur_frame)
            predict_seqs.append(predicted)
        return predict_seqs 

    def predict_point(self, data, debug = False):
        print("predict point start")
        if debug:
            print("predict data shape ", np.array(data).shape)
        predicted = self.model.predict(data)
        if debug:
            print("predited data shape ", np.array(predicted).shape)
        predicted = np.reshape(predicted, (predicted.size, ))
        if debug:
            print("predited data shape ", np.array(predicted).shape)
        print("predict point completed")
        return predicted

3.画图展示

def plot_point_result(predicted_data, true_data):
    fig = plt.figure(facecolor='white')
    sub_plot = fig.add_subplot(111)
    sub_plot.plot(true_data, label="True Data")
    plt.plot(predicted_data, label="Predict Data")
    plt.legend()
    plt.show()
    plt.savefig("LSTMModel_stock_price_predict_point_result.png")    

def plot_sequences_result(predicted_data, true_data, predict_len):
    fig = plt.figure(facecolor="white")
    sub_plot = fig.add_subplot(111)
    sub_plot.plot(true_data, label="True Data")
    plt.legend()
    for i, data in enumerate(predicted_data):
        padding = [None for p in range(i * predict_len)]
        plt.plot(padding+data, label="Predict Data")
    plt.show()
    plt.savefig("LSTMModel_stock_price_predict_sequences_result.png")

4.main方法

model_config = json.load(open("rnn_stock_predict_config.json", 'r'))
save_dir = model_config['model']['save_dir']
if not os.path.exists(save_dir):
    os.makedirs(save_dir)
#读取数据
data_loader = DataLoader(os.path.join('data', model_config['data']['filename']),
                 model_config['data']['train_test_split'],
                 model_config['data']['columns'])
#创建RNN模型
lstm_model = LSTMModel()
builded_lstm_model = lstm_model.build_model(model_config)
plot_model(builded_lstm_model, to_file="LSTMModel_stock_price_predict.png", show_shapes=True)

#加载训练数据
x, y = data_loader.get_train_data(seq_len=model_config['data']['sequence_length'],
                                 normalise=model_config['data']["normalise"])
print("train data x shape: ", x.shape)
print("train data y shape: ", y.shape)

#训练模型
lstm_model.train(x, y, epochs = model_config['training']['epochs'],
                batch_size = model_config['training']['batch_size'],
                save_dir = model_config['model']['save_dir'])

#获取测试数据
x_test, y_test = data_loader.get_test_data(seq_len=model_config['data']['sequence_length'],
                                          normalise=model_config['data']['normalise'])

#测试
predict_seqs = lstm_model.predict_sequences(x_test,
                        model_config['data']['sequence_length'],
                        model_config['data']['sequence_length'])
predict_point = lstm_model.predict_point(x_test, debug = True)

model compiled
Time taken: 0:00:00.063992
train data x shape:  (3942, 49, 2)
train data y shape:  (3942, 1)
model train started epochs 1 batch_size 32 
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 35ms/step - loss: 0.0019
model train completed. model save as  saved_models\20092024-153703-e1.h5.keras
Time taken: 0:00:07.437250
 predict sequence multiple...
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 305ms/step

predict point start
predict data shape  (655, 49, 2)
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step
predited data shape  (655, 1)
predited data shape  (655,)
predict point completed

#展示测试效果
#预测下一日
plot_point_result(predict_point, y_test)
#预测50天
plot_sequences_result(predict_seqs, y_test, model_config['data']['sequence_length'])

在这里插入图片描述

<Figure size 640x480 with 0 Axes>

在这里插入图片描述

<Figure size 640x480 with 0 Axes>