In [1]:
import numpy
import matplotlib.pyplot as plt
import pandas
import math
import tensorflow
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
numpy.random.seed(7)
In [2]:
dataset = pandas.read_csv('airline-passengers.csv', usecols=[1])
In [3]:
dataset.head()
Out[3]:
Passengers
0 112
1 118
2 132
3 129
4 121
In [4]:
## converting dataframe into numpy array
dataset = dataset.values
In [5]:
## changing the datatype
dataset = dataset.astype('float32')
In [6]:
## Scaling

scaler = MinMaxScaler()
dataset = scaler.fit_transform(dataset)
In [7]:
dataset
Out[7]:
array([[0.01544401],
       [0.02702703],
       [0.05405405],
       [0.04826255],
       [0.03281853],
       [0.05984557],
       [0.08494207],
       [0.08494207],
       [0.06177607],
       [0.02895753],
       [0.        ],
       [0.02702703],
       [0.02123553],
       [0.04247104],
       [0.07142857],
       [0.05984557],
       [0.04054055],
       [0.08687258],
       [0.12741312],
       [0.12741312],
       [0.10424709],
       [0.05598456],
       [0.01930502],
       [0.06949806],
       [0.07915059],
       [0.08880308],
       [0.14285713],
       [0.11389962],
       [0.13127413],
       [0.14285713],
       [0.18339768],
       [0.18339768],
       [0.15444016],
       [0.11196911],
       [0.08108109],
       [0.1196911 ],
       [0.12934363],
       [0.14671814],
       [0.17181468],
       [0.14864865],
       [0.15250966],
       [0.22007722],
       [0.24324325],
       [0.26640925],
       [0.2027027 ],
       [0.16795367],
       [0.13127413],
       [0.17374519],
       [0.17760617],
       [0.17760617],
       [0.25482625],
       [0.25289574],
       [0.24131274],
       [0.26833975],
       [0.3088803 ],
       [0.32432434],
       [0.25675675],
       [0.20656371],
       [0.14671814],
       [0.18725869],
       [0.19305018],
       [0.16216215],
       [0.25289574],
       [0.23745173],
       [0.25096524],
       [0.3088803 ],
       [0.38223937],
       [0.36486486],
       [0.2992278 ],
       [0.24131274],
       [0.1911197 ],
       [0.24131274],
       [0.26640925],
       [0.24903473],
       [0.31467178],
       [0.3185328 ],
       [0.32046333],
       [0.4073359 ],
       [0.5019305 ],
       [0.46911195],
       [0.40154442],
       [0.32818535],
       [0.25675675],
       [0.3359073 ],
       [0.34749034],
       [0.33397684],
       [0.41119692],
       [0.4034749 ],
       [0.4131274 ],
       [0.52123547],
       [0.5965251 ],
       [0.58108103],
       [0.484556  ],
       [0.3899614 ],
       [0.3223938 ],
       [0.3899614 ],
       [0.4073359 ],
       [0.3803089 ],
       [0.48648646],
       [0.47104248],
       [0.484556  ],
       [0.6138996 ],
       [0.6969112 ],
       [0.70077217],
       [0.57915056],
       [0.46911195],
       [0.38803086],
       [0.44787642],
       [0.45559844],
       [0.4131274 ],
       [0.4980695 ],
       [0.47104248],
       [0.49999997],
       [0.6389961 ],
       [0.7471043 ],
       [0.7741313 ],
       [0.57915056],
       [0.492278  ],
       [0.3976834 ],
       [0.44980696],
       [0.49420848],
       [0.45945945],
       [0.5830116 ],
       [0.5637065 ],
       [0.61003864],
       [0.71042466],
       [0.8571429 ],
       [0.8783784 ],
       [0.69305015],
       [0.5849421 ],
       [0.4980695 ],
       [0.58108103],
       [0.6042471 ],
       [0.554054  ],
       [0.60810804],
       [0.6891892 ],
       [0.71042466],
       [0.8320464 ],
       [1.        ],
       [0.96911204],
       [0.7799227 ],
       [0.6891892 ],
       [0.55212355],
       [0.6332046 ]], dtype=float32)
In [8]:
train_size = int(len(dataset) * 0.67)
test_size = len(dataset) - train_size
train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]
print(len(train), len(test))
96 48
In [9]:
# convert an array of values into a dataset matrix
def create_dataset(dataset, look_back=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back), 0]
        dataX.append(a)
        dataY.append(dataset[i + look_back, 0])
    return numpy.array(dataX), numpy.array(dataY)
In [10]:
look_back = 1
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)
# reshape input to be [samples, time steps, features]
trainX = numpy.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
testX = numpy.reshape(testX, (testX.shape[0], 1, testX.shape[1]))
In [11]:
## model
model = Sequential()
model.add(LSTM(4, input_shape=(1, look_back)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
In [12]:
model.fit(trainX, trainY, epochs=100, batch_size=1, verbose=2)
Epoch 1/100
2023-01-11 21:24:45.329097: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
94/94 - 1s - loss: 0.0329 - 587ms/epoch - 6ms/step
Epoch 2/100
94/94 - 0s - loss: 0.0150 - 42ms/epoch - 450us/step
Epoch 3/100
94/94 - 0s - loss: 0.0119 - 42ms/epoch - 448us/step
Epoch 4/100
94/94 - 0s - loss: 0.0108 - 41ms/epoch - 434us/step
Epoch 5/100
94/94 - 0s - loss: 0.0100 - 43ms/epoch - 456us/step
Epoch 6/100
94/94 - 0s - loss: 0.0090 - 41ms/epoch - 438us/step
Epoch 7/100
94/94 - 0s - loss: 0.0081 - 42ms/epoch - 447us/step
Epoch 8/100
94/94 - 0s - loss: 0.0072 - 43ms/epoch - 460us/step
Epoch 9/100
94/94 - 0s - loss: 0.0064 - 42ms/epoch - 452us/step
Epoch 10/100
94/94 - 0s - loss: 0.0056 - 42ms/epoch - 447us/step
Epoch 11/100
94/94 - 0s - loss: 0.0049 - 45ms/epoch - 479us/step
Epoch 12/100
94/94 - 0s - loss: 0.0044 - 42ms/epoch - 446us/step
Epoch 13/100
94/94 - 0s - loss: 0.0037 - 42ms/epoch - 447us/step
Epoch 14/100
94/94 - 0s - loss: 0.0033 - 42ms/epoch - 449us/step
Epoch 15/100
94/94 - 0s - loss: 0.0029 - 42ms/epoch - 450us/step
Epoch 16/100
94/94 - 0s - loss: 0.0026 - 44ms/epoch - 467us/step
Epoch 17/100
94/94 - 0s - loss: 0.0025 - 42ms/epoch - 450us/step
Epoch 18/100
94/94 - 0s - loss: 0.0023 - 42ms/epoch - 449us/step
Epoch 19/100
94/94 - 0s - loss: 0.0022 - 42ms/epoch - 449us/step
Epoch 20/100
94/94 - 0s - loss: 0.0021 - 42ms/epoch - 449us/step
Epoch 21/100
94/94 - 0s - loss: 0.0020 - 43ms/epoch - 453us/step
Epoch 22/100
94/94 - 0s - loss: 0.0021 - 42ms/epoch - 449us/step
Epoch 23/100
94/94 - 0s - loss: 0.0020 - 42ms/epoch - 452us/step
Epoch 24/100
94/94 - 0s - loss: 0.0020 - 41ms/epoch - 441us/step
Epoch 25/100
94/94 - 0s - loss: 0.0021 - 42ms/epoch - 449us/step
Epoch 26/100
94/94 - 0s - loss: 0.0020 - 41ms/epoch - 438us/step
Epoch 27/100
94/94 - 0s - loss: 0.0020 - 41ms/epoch - 436us/step
Epoch 28/100
94/94 - 0s - loss: 0.0020 - 73ms/epoch - 771us/step
Epoch 29/100
94/94 - 0s - loss: 0.0020 - 41ms/epoch - 439us/step
Epoch 30/100
94/94 - 0s - loss: 0.0021 - 43ms/epoch - 453us/step
Epoch 31/100
94/94 - 0s - loss: 0.0020 - 43ms/epoch - 461us/step
Epoch 32/100
94/94 - 0s - loss: 0.0021 - 42ms/epoch - 448us/step
Epoch 33/100
94/94 - 0s - loss: 0.0020 - 43ms/epoch - 459us/step
Epoch 34/100
94/94 - 0s - loss: 0.0020 - 42ms/epoch - 450us/step
Epoch 35/100
94/94 - 0s - loss: 0.0021 - 42ms/epoch - 451us/step
Epoch 36/100
94/94 - 0s - loss: 0.0020 - 43ms/epoch - 454us/step
Epoch 37/100
94/94 - 0s - loss: 0.0020 - 43ms/epoch - 461us/step
Epoch 38/100
94/94 - 0s - loss: 0.0020 - 42ms/epoch - 445us/step
Epoch 39/100
94/94 - 0s - loss: 0.0020 - 42ms/epoch - 448us/step
Epoch 40/100
94/94 - 0s - loss: 0.0020 - 42ms/epoch - 449us/step
Epoch 41/100
94/94 - 0s - loss: 0.0020 - 43ms/epoch - 455us/step
Epoch 42/100
94/94 - 0s - loss: 0.0020 - 42ms/epoch - 449us/step
Epoch 43/100
94/94 - 0s - loss: 0.0020 - 41ms/epoch - 432us/step
Epoch 44/100
94/94 - 0s - loss: 0.0021 - 42ms/epoch - 451us/step
Epoch 45/100
94/94 - 0s - loss: 0.0020 - 42ms/epoch - 442us/step
Epoch 46/100
94/94 - 0s - loss: 0.0020 - 41ms/epoch - 437us/step
Epoch 47/100
94/94 - 0s - loss: 0.0020 - 41ms/epoch - 431us/step
Epoch 48/100
94/94 - 0s - loss: 0.0020 - 41ms/epoch - 435us/step
Epoch 49/100
94/94 - 0s - loss: 0.0021 - 41ms/epoch - 441us/step
Epoch 50/100
94/94 - 0s - loss: 0.0020 - 42ms/epoch - 444us/step
Epoch 51/100
94/94 - 0s - loss: 0.0020 - 42ms/epoch - 450us/step
Epoch 52/100
94/94 - 0s - loss: 0.0020 - 42ms/epoch - 448us/step
Epoch 53/100
94/94 - 0s - loss: 0.0020 - 41ms/epoch - 439us/step
Epoch 54/100
94/94 - 0s - loss: 0.0020 - 41ms/epoch - 435us/step
Epoch 55/100
94/94 - 0s - loss: 0.0020 - 41ms/epoch - 439us/step
Epoch 56/100
94/94 - 0s - loss: 0.0021 - 41ms/epoch - 438us/step
Epoch 57/100
94/94 - 0s - loss: 0.0020 - 41ms/epoch - 439us/step
Epoch 58/100
94/94 - 0s - loss: 0.0020 - 41ms/epoch - 439us/step
Epoch 59/100
94/94 - 0s - loss: 0.0020 - 40ms/epoch - 429us/step
Epoch 60/100
94/94 - 0s - loss: 0.0020 - 40ms/epoch - 424us/step
Epoch 61/100
94/94 - 0s - loss: 0.0019 - 41ms/epoch - 438us/step
Epoch 62/100
94/94 - 0s - loss: 0.0020 - 41ms/epoch - 437us/step
Epoch 63/100
94/94 - 0s - loss: 0.0020 - 41ms/epoch - 434us/step
Epoch 64/100
94/94 - 0s - loss: 0.0020 - 41ms/epoch - 436us/step
Epoch 65/100
94/94 - 0s - loss: 0.0021 - 41ms/epoch - 438us/step
Epoch 66/100
94/94 - 0s - loss: 0.0020 - 41ms/epoch - 435us/step
Epoch 67/100
94/94 - 0s - loss: 0.0021 - 41ms/epoch - 439us/step
Epoch 68/100
94/94 - 0s - loss: 0.0020 - 41ms/epoch - 438us/step
Epoch 69/100
94/94 - 0s - loss: 0.0020 - 41ms/epoch - 433us/step
Epoch 70/100
94/94 - 0s - loss: 0.0020 - 41ms/epoch - 438us/step
Epoch 71/100
94/94 - 0s - loss: 0.0021 - 41ms/epoch - 432us/step
Epoch 72/100
94/94 - 0s - loss: 0.0019 - 40ms/epoch - 430us/step
Epoch 73/100
94/94 - 0s - loss: 0.0021 - 41ms/epoch - 440us/step
Epoch 74/100
94/94 - 0s - loss: 0.0020 - 40ms/epoch - 425us/step
Epoch 75/100
94/94 - 0s - loss: 0.0020 - 43ms/epoch - 453us/step
Epoch 76/100
94/94 - 0s - loss: 0.0020 - 42ms/epoch - 446us/step
Epoch 77/100
94/94 - 0s - loss: 0.0020 - 43ms/epoch - 453us/step
Epoch 78/100
94/94 - 0s - loss: 0.0021 - 43ms/epoch - 456us/step
Epoch 79/100
94/94 - 0s - loss: 0.0020 - 41ms/epoch - 432us/step
Epoch 80/100
94/94 - 0s - loss: 0.0020 - 41ms/epoch - 436us/step
Epoch 81/100
94/94 - 0s - loss: 0.0020 - 40ms/epoch - 428us/step
Epoch 82/100
94/94 - 0s - loss: 0.0020 - 39ms/epoch - 412us/step
Epoch 83/100
94/94 - 0s - loss: 0.0020 - 41ms/epoch - 434us/step
Epoch 84/100
94/94 - 0s - loss: 0.0021 - 40ms/epoch - 429us/step
Epoch 85/100
94/94 - 0s - loss: 0.0020 - 42ms/epoch - 443us/step
Epoch 86/100
94/94 - 0s - loss: 0.0021 - 41ms/epoch - 431us/step
Epoch 87/100
94/94 - 0s - loss: 0.0020 - 42ms/epoch - 442us/step
Epoch 88/100
94/94 - 0s - loss: 0.0020 - 41ms/epoch - 439us/step
Epoch 89/100
94/94 - 0s - loss: 0.0020 - 41ms/epoch - 438us/step
Epoch 90/100
94/94 - 0s - loss: 0.0020 - 41ms/epoch - 439us/step
Epoch 91/100
94/94 - 0s - loss: 0.0020 - 41ms/epoch - 439us/step
Epoch 92/100
94/94 - 0s - loss: 0.0020 - 41ms/epoch - 435us/step
Epoch 93/100
94/94 - 0s - loss: 0.0020 - 41ms/epoch - 436us/step
Epoch 94/100
94/94 - 0s - loss: 0.0020 - 41ms/epoch - 438us/step
Epoch 95/100
94/94 - 0s - loss: 0.0020 - 42ms/epoch - 446us/step
Epoch 96/100
94/94 - 0s - loss: 0.0020 - 40ms/epoch - 431us/step
Epoch 97/100
94/94 - 0s - loss: 0.0020 - 41ms/epoch - 432us/step
Epoch 98/100
94/94 - 0s - loss: 0.0020 - 41ms/epoch - 438us/step
Epoch 99/100
94/94 - 0s - loss: 0.0020 - 41ms/epoch - 434us/step
Epoch 100/100
94/94 - 0s - loss: 0.0019 - 40ms/epoch - 428us/step
Out[12]:
<keras.callbacks.History at 0x1550a0af0>
In [13]:
trainPredict = model.predict(trainX)
testPredict = model.predict(testX)
# invert predictions
trainPredict = scaler.inverse_transform(trainPredict)
trainY = scaler.inverse_transform([trainY])
testPredict = scaler.inverse_transform(testPredict)
testY = scaler.inverse_transform([testY])
# calculate root mean squared error
trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:,0]))
testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:,0]))

# shift train predictions for plotting
trainPredictPlot = numpy.empty_like(dataset)
trainPredictPlot[:, :] = numpy.nan
trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict
# shift test predictions for plotting
testPredictPlot = numpy.empty_like(dataset)
testPredictPlot[:, :] = numpy.nan
testPredictPlot[len(trainPredict)+(look_back*2)+1:len(dataset)-1, :] = testPredict
# plot baseline and predictions
plt.plot(scaler.inverse_transform(dataset))
plt.plot(trainPredictPlot)
plt.plot(testPredictPlot)
plt.show()
3/3 [==============================] - 0s 748us/step
2/2 [==============================] - 0s 1ms/step