PACF Simulation - Learning Notes

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import numpy as np
import matplotlib.pyplot as plt

# we will consider a time series and plot its 
# sample acf and pacf functions
# x_{t} = 1.5x_{t-1} + -0.75x_{t-2} + w_{t}
# where w is white noise with 0 mean and constant variance

n = 1000 # no of points to simulate
w = np.random.randn(n)
x = np.zeros(n)
for i in range(n):
    x[i] = 1.5*x[i-1] - 0.75*x[i-2] + w[i]
x = x[2:]
n -= 2
# all following calculations are concerened with 21 positive lags
"""
estimated acf can be calculated using the formula
\hat{\gamma}(h) = (1/n) \sum_{i=1}^{n-h}x_{i}x_{i+h}
and the function is symmetric around 0
"""

acf_sample = np.zeros(21)
# calculate gamma for positive lags only
for h in range(21):
    acf_sample[h] = (1.0/n) * np.dot(x[0:n-h], x[h:n])

# convert to rho
rho_0 = acf_sample[0]
for i in range(21):
    acf_sample[i] /= rho_0


"""
sample pacf can be obtained by regressing the time steps in
consideration on all the intermediate time steps
PACF(x_{t}, x_{t+h}) = Cov(x_{t} - \hat{x}_{t}, x_{t+h} - \hat{x}_{t+h})
\hat{x}_{t} = linear combination of (x_{t+1}, x_{t+2}, .., x_{t+h-1})
"""
pacf_sample = np.zeros(21)
# calculate gamma for positive lags only
for h in range(21):
    # for lag 0 and 1, the value is same as acf and already set above
    if(h > 1):
        # least squares solution to y = X \beta is 
        # \beta = (X.T X)^{-1} X.T y
        X = np.zeros((n-h,h-1))
        for i in range(1,h):
            X[:,i-1] = x[i:n-h+i]
        x0 = x[0:n-h].reshape((-1,1))
        xh = x[h:].reshape((-1,1))
        hatx0 = x0 - np.matmul(X, np.matmul(np.linalg.inv(np.matmul(X.T,X)), np.matmul(X.T, x0)))
        hatxh = xh - np.matmul(X, np.matmul(np.linalg.inv(np.matmul(X.T,X)), np.matmul(X.T, xh)))
        pacf_sample[h] = ((1.0/n) * np.matmul(hatx0.T, hatxh))/(np.sqrt(np.var(hatx0)*np.var(hatxh)))
    else:
        # pacf will be same as acf since no conditional values calculated
        pacf_sample[h] = acf_sample[h]


fig, axs = plt.subplots(3,1, figsize=(11,10))
# plot the time series
axs[0].plot(list(range(1,n+1)), x, color='blue')
axs[0].set_title('Time Series x(t) = 1.5x(t-1) - 0.75x(t-2) + w(t), AR(2)')
axs[0].set_xlabel('t')
axs[0].set_ylabel('x(t)')

# plot the sample acf
# axs[1].set_ylim((0,1))
axs[1].axhline(0, xmin=0, xmax=21, color='black')
for i in range(21):
    axs[1].plot([i,i], [0,acf_sample[i]], color='blue')
axs[1].scatter(list(range(21)), acf_sample, color='black')
axs[1].set_title('Estimated ACF')
axs[1].set_xlabel('Lag')
axs[1].set_ylabel('ACF')

# plot the sample pacf
# axs[1].set_ylim((0,1))
axs[2].axhline(0, xmin=0, xmax=21, color='black')
for i in range(21):
    axs[2].plot([i,i], [0,pacf_sample[i]], color='blue')
axs[2].scatter(list(range(21)), pacf_sample, color='black')
axs[2].set_title('Estimated PACF')
axs[2].set_xlabel('Lag')
axs[2].set_ylabel('PACF')

plt.tight_layout()
# plt.show()
fig.savefig('time.png')