Multi-Step LSTM预测(1)
数据集
数据准备与模型评估
1、拆分成训练和测试数据。
训练数据=前两年香皂销售数据
测试数据=剩下一年的香皂销售数据
2、Multi-Step 预测
假设需要预测3个月的销售数据
3、模型评估
用rolling-forcast(walk-forward)方式模型验证
测试数据每个时间步,滑动一个值,预测;之后测试数据的下一个真实观测值加入模型,并预测
用RMSE评估
持久模型(Persistence Model)
他是很好的时间序列预测的基准
是最简单的预测
原理:
用当前值作为之后的预测值
静态模型完整过程
1、转换成有监督数据
参考 Python时间序列LSTM预测系列教程(8)-多变量中的方法
2、切分成训练和测试数据
3、训练模型
4、预测
5、评估
6、输出
静态模型预测代码解析
#coding=utf-8
from pandas import read_csv
from pandas import DataFrame
from pandas import concat
from sklearn.metrics import mean_squared_error
from math import sqrt
from matplotlib import pyplot
from pandas import datetime
def parser(x):
return datetime.strptime(x, '%Y/%m/%d')
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
n_vars = 1 if type(data) is list else data.shape[1]
df = DataFrame(data)#数据多了行标、列标
cols, names = list(), list()
for i in range(n_in, 0, -1):
cols.append(df.shift(i))
names+=[('var%d(t-%d)'%(j+1,i)) for j in range(n_vars)]
for i in range(0, n_out, 1):
cols.append(df.shift(-i))
if i==0:
names+=[('var%d(t)'%(j+1)) for j in range(n_vars)]
else:
names+=[('var%d(t+%d)'%(j+1, i)) for j in range(n_vars)]
agg = concat(cols, axis=1)
agg.columns = names
if dropnan:
agg.dropna(inplace=True)
return agg
#拆分正训练+测试数据
def prepare_data(series, n_test, n_lay, n_seq):
raw_values = series.values
raw_values = raw_values.reshape(len(raw_values), 1)
supervised = series_to_supervised(raw_values, n_lay, n_seq)
supervised_values = supervised.values
train, test = supervised_values[0:-n_test], supervised_values[-n_test:]
return train, test
#persistence model预测
#用上一次观察值作为之后n_seq的预测值
def persistence(last_ob, n_seq):
return [last_ob for i in range(n_seq)]
#评估persistence model
def make_forcast(train, test, n_lay, n_seq):
forcasts = list()
for i in range(len(test)):
x, y = test[i, 0:n_lag], test[i, n_lag:]
forcast = persistence(x[-1], n_seq)
forcasts.append(forcast)
return forcasts
#预测评估
def evaluate_forcasts(test, forcasts, n_lag, n_seq):
for i in range(n_seq):
actual = test[:, (n_lag+i)]
predicted = [forcast[i] for forcast in forcasts]
print 'predicted'
print predicted
rmse = sqrt(mean_squared_error(actual, predicted))
print 't+%d RMSE:%f'%((i+1), rmse)#1~n_seq各个长度的预测的rmse
def plot_forcasts(series, forcasts, n_test):
#原始数据
pyplot.plot(series.values)
#预测数据
for i in range(len(forcasts)):
off_s = len(series)-n_test+i-1
off_e = off_s + len(forcasts[i])+1
xaxis = [x for x in range(off_s, off_e)]
yaxis = [series.values[off_s]]+forcasts[i]
print 'xaxis'
print xaxis
print 'yaxis'
print yaxis
print 'series.values[off_s]'
print series.values[off_s]
pyplot.plot(xaxis, yaxis, color='red')
pyplot.show()
series = read_csv('data_set/shampoo-sales.csv', header=0, parse_dates=[0], index_col=0, squeeze=True, date_parser=parse
r)
#一步数据,预测3步
n_lag = 1
n_seq = 3
n_test = 10#给了最后12个月,预测3个月,则能预测的次数是10,即10个3个月
train, test = prepare_data(series, n_test, n_lag, n_seq)
print 'train data'
print train
print 'test data'
print test
forecasts = make_forcast(train, test, n_lag, n_seq)
print 'forecasts'
print forecasts
evaluate_forcasts(test, forecasts, n_lag, n_seq)
plot_forcasts(series, forecasts, n_test+2)