淘先锋技术网

首页 1 2 3 4 5 6 7

Multi-Step LSTM预测(1)




数据集



数据准备与模型评估


1、拆分成训练和测试数据。
训练数据=前两年香皂销售数据
测试数据=剩下一年的香皂销售数据

2、Multi-Step 预测
假设需要预测3个月的销售数据

3、模型评估
用rolling-forcast(walk-forward)方式模型验证
测试数据每个时间步,滑动一个值,预测;之后测试数据的下一个真实观测值加入模型,并预测

用RMSE评估

持久模型(Persistence Model)


他是很好的时间序列预测的基准
是最简单的预测

原理:
用当前值作为之后的预测值

静态模型完整过程


1、转换成有监督数据
2、切分成训练和测试数据
3、训练模型
4、预测
5、评估
6、输出

静态模型预测代码解析


#coding=utf-8                                                                                                          
from pandas import read_csv      
from pandas import DataFrame     
from pandas import concat
from sklearn.metrics import mean_squared_error
from math import sqrt
from matplotlib import pyplot    
from pandas import datetime      
 
def parser(x):
    return datetime.strptime(x, '%Y/%m/%d')
 
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = DataFrame(data)#数据多了行标、列标
    cols, names = list(), list()
    for i in range(n_in, 0, -1): 
        cols.append(df.shift(i)) 
        names+=[('var%d(t-%d)'%(j+1,i)) for j in range(n_vars)]
    for i in range(0, n_out, 1): 
        cols.append(df.shift(-i))
        if i==0:
            names+=[('var%d(t)'%(j+1)) for j in range(n_vars)]
        else:
            names+=[('var%d(t+%d)'%(j+1, i)) for j in range(n_vars)]
    agg = concat(cols, axis=1)
    agg.columns = names
    if dropnan:
        agg.dropna(inplace=True)
    return agg
	
#拆分正训练+测试数据                                                                                                   
def prepare_data(series, n_test, n_lay, n_seq):
    raw_values = series.values
    raw_values = raw_values.reshape(len(raw_values), 1)
    
    supervised = series_to_supervised(raw_values, n_lay, n_seq)
    supervised_values = supervised.values
 
    train, test = supervised_values[0:-n_test], supervised_values[-n_test:]
    return train, test
 
#persistence model预测
#用上一次观察值作为之后n_seq的预测值
def persistence(last_ob, n_seq):
    return [last_ob for i in range(n_seq)]
 
#评估persistence model
def make_forcast(train, test, n_lay, n_seq):
    forcasts = list()
    for i in range(len(test)):
        x, y = test[i, 0:n_lag], test[i, n_lag:]
        forcast = persistence(x[-1], n_seq)
        forcasts.append(forcast)
    return forcasts
 
#预测评估
def evaluate_forcasts(test, forcasts, n_lag, n_seq):
	for i in range(n_seq): 
		actual = test[:, (n_lag+i)]
        predicted = [forcast[i] for forcast in forcasts]
        print 'predicted'
        print predicted
        rmse = sqrt(mean_squared_error(actual, predicted))
        print 't+%d RMSE:%f'%((i+1), rmse)#1~n_seq各个长度的预测的rmse
 
def plot_forcasts(series, forcasts, n_test):
    #原始数据
    pyplot.plot(series.values)
    #预测数据
    for i in range(len(forcasts)):
        off_s = len(series)-n_test+i-1
        off_e = off_s + len(forcasts[i])+1
        xaxis = [x for x in range(off_s, off_e)]
        yaxis = [series.values[off_s]]+forcasts[i]
        print 'xaxis'
        print xaxis
        print 'yaxis'
        print yaxis
        print 'series.values[off_s]'
        print series.values[off_s]
        pyplot.plot(xaxis, yaxis, color='red')
    pyplot.show()
 
series = read_csv('data_set/shampoo-sales.csv', header=0, parse_dates=[0], index_col=0, squeeze=True, date_parser=parse
r)
 
#一步数据,预测3步
n_lag = 1
n_seq = 3     
n_test = 10#给了最后12个月,预测3个月,则能预测的次数是10,即10个3个月
train, test = prepare_data(series, n_test, n_lag, n_seq)
print 'train data'
print train
print 'test data'
print test
forecasts = make_forcast(train, test, n_lag, n_seq)
print 'forecasts'
print forecasts
evaluate_forcasts(test, forecasts, n_lag, n_seq)
plot_forcasts(series, forecasts, n_test+2)