Here you can find the code listings in R
language from the corresponding chapter of the book.
You need to load Rssa
, ssabook
, lattice
, latticeExtra
, plyr
, fma
to run these examples.
#minimal LRR
x <- 1.01^(1:10)
s <- ssa(x, L = 2)
l <- lrr(s, groups = list(1))
print(l)
print(roots(l))
#extraneous roots
x <- 1.01^(1:10)
s <- ssa(x, L = 6)
l <- lrr(s, groups = list(1))
r <- roots(l)
plot(l)
#multiple roots
x <- 2.188 * (1:10) + 7.77
s <- ssa(x, L = 3)
l <- lrr(s, groups = list(1:2))
print(l)
print(roots(l))
Exponential signal: One signal and four extraneous roots.
[1] 1.01
attr(,"class")
[1] "lrr"
[1] 1.01
[1] -1 2
attr(,"class")
[1] "lrr"
[1] 1.0000003 0.9999997
# Decompose "co2" series with default window length L
s <- ssa(co2)
# Estimate the periods from 2nd and 3rd eigenvectors
# using default "pairs" method
print(parestimate(s, groups = list(c(2, 3)), method = "pairs"))
# Estimate the periods and rates using ESPRIT
pe <- parestimate(s, groups = list(1:6),
method = "esprit")
print(pe)
plot(pe)
‘CO2’: Six signal roots.
period rate | Mod Arg | Re Im
11.995 0.000000 | 1.00000 0.52 | 0.86592 0.50019
period rate | Mod Arg | Re Im
11.995 0.000542 | 1.00054 0.52 | 0.86638 0.50047
-11.995 0.000542 | 1.00054 -0.52 | 0.86638 -0.50047
5.999 0.000512 | 1.00051 1.05 | 0.50015 0.86653
-5.999 0.000512 | 1.00051 -1.05 | 0.50015 -0.86653
Inf 0.000375 | 1.00037 0.00 | 1.00037 0.00000
Inf -0.008308 | 0.99173 -0.00 | 0.99173 -0.00000
# Decomposition stage
s <- ssa(co2, L = 120)
# Recurrent forecast, the result is the forecast values only
# The result is the set of forecasts for each group
for1 <- rforecast(s, groups = list(1, c(1,4), 1:4, 1:6),
len = 12)
matplot(data.frame(for1), type = "b",
pch = c("1", "2", "3", "4"), ylab = "")
# Vector forecast, the forecasted points are
# added to the base series
for1a <- vforecast(s,
groups = list(1, trend = c(1,4), 1:4, 1:6),
len = 36, only.new = FALSE)
# Plot of the forecast based on the second group c(1,4)
plot(cbind(co2, for1a$trend), plot.type = "single",
col = c("black", "red"), ylab = NULL)
# Reverse recurrent forecast
len <- 60
for2 <- rforecast(s, groups = list(1:6), len = len,
only.new = TRUE, reverse = TRUE)
initial <- c(rep(NA, len), co2)
forecasted <- c(for2, rep(NA, length(co2)))
matplot(data.frame(initial, forecasted), ylab = NULL,
type = "l", col = c("black", "red"), lty = c(1, 1))
set.seed(3)
for3 <- forecast(s, groups = list(1:6),
method = "recurrent", interval = "confidence",
only.intervals = FALSE,
len = 24, R = 100, level = 0.99)
plot(for3, include = 36, shadecols = "green", type = "l",
main = "Confidence intervals")
set.seed(3)
for4 <- forecast(s, groups = list(1:6),
method = "recurrent", interval = "prediction",
only.intervals = FALSE,
len = 24, R = 100, level = 0.99)
plot(for4, include = 36, shadecols = "green", type = "l",
main = "Prediction intervals")
‘CO2’: A set of recurrent forecasts.
‘CO2’: Forecast of trend.
‘CO2’: Backward forecast of the signal.
F <- co2
F[201:300] <- NA
s <- ssa(F, L = 72)
g0 <- gapfill(s, groups = list(c(1, 4)), method = "sequential",
alpha = 0, base = "reconstructed")
g1 <- gapfill(s, groups = list(c(1, 4)), method = "sequential",
alpha = 1, base = "reconstructed")
g <- gapfill(s, groups = list(c(1, 4)), method = "sequential",
base = "reconstructed")
plot(co2, col = "black")
lines(g0, col = "blue", lwd = 2)
lines(g1, col = "green", lwd = 2)
lines(g, col = "red", lwd = 2)
‘CO2’: Subspace-based gap filling, from the left,
from the right, and their combination.
F <- co2
F[201:300] <- NA
is <- ssa(F, L = 72)
ig <- igapfill(is, groups = list(c(1,4)),
base = "reconstructed")
igo <- igapfill(is, groups = list(c(1,4)),
base = "original")
# Compare the result
plot(co2, col="black")
lines(ig, col = "blue", lwd = 1)
lines(igo, col = "red", lwd = 1)
ig1 <- igapfill(is, groups = list(c(1, 4)),
base = "original", maxiter = 1)
ig5 <- igapfill(is, groups = list(c(1, 4)), fill = ig1,
base = "original", maxiter = 4)
ig10 <- igapfill(is, groups = list(c(1, 4)), fill = ig5,
base = "original", maxiter = 5)
init.lin <- F
init.lin[200:301] <- F[200] + (0:101) / 101 * (F[301] - F[200])
ig.lin <- igapfill(s,
fill = init.lin,
groups = list(c(1, 4)),
base = "original", maxiter = 10)
# Compare the result
plot(co2, col = "black")
lines(ig1, col = "green", lwd = 1)
lines(ig5, col = "blue", lwd = 1)
lines(ig10, col = "red", lwd = 1)
lines(ig.lin, col = "darkred", lwd = 1)
‘CO2’: Iterative gap filling of trend.
‘CO2’: Iterative gap filling of trend: convergence.
F <- co2
loc <- c(11:17, 61:67, 71:77, 101:107)
F[loc] <- NA;
sr <- ssa(F, L = 200)
igr <- igapfill(sr, groups = list(c(1:6)), fill = 320,
base = "original", maxiter = 10)
gr <- gapfill(sr, groups = list(c(1:6)),
method = "simultaneous", base = "original")
G <- rep(NA, length(F)); G[loc] = gr[loc]
print(mean((gr[loc] - co2[loc])^2)) #MSE of gapfill
print(mean((igr[loc] - co2[loc])^2)) #MSE of igapfill
xyplot(igr + G + F ~ time(co2), type = "l",
lwd = c(1, 2, 1), ylab = NULL,
auto.key = list(lines = TRUE, points = FALSE,
text = c("igapfill", "gapfill", "series")))
[1] 0.1132225
[1] 0.1425962
‘CO2’: Iterative and simultaneous subspace-based gap filling of trend: randomly located gaps.
cut <- 49 + 60
x <- window(co2, end = time(co2)[length(co2) - cut + 1])
L <- 60
K <- length(x) - L + 1
alpha <- 0.01
weights <- vector(len = K)
weights[1:K] <- alpha
weights[seq(K, 1, -L)] <- 1
xyplot(weights ~ 1:K, type = "l")
s1 <- ssa(x, L = L) #to detect the series rank
ncomp <- 6
s01 <- ssa(x, L = L, column.oblique = "identity",
row.oblique = weights)
c01 <- cadzow(s01, rank = ncomp, maxiter = 10)
s01.1 <- ssa(c01, L = L, column.oblique = NULL,
row.oblique = NULL)
c01.1 <- cadzow(s01.1, rank = ncomp, tol = 1.e-8 * mean(co2))
print(t(ssa(c01.1, L = ncomp + 1)$sigma), digits = 5)
ss01.1<- ssa(c01.1, L = ncomp + 1)
fr <- rforecast(ss01.1, groups = list(1:ncomp), len = cut)
xyplot(cbind(Original = co2, Cadzow1and01 = c01.1,
ForecastCadzow = fr), superpose = TRUE)
print(parestimate(ss01.1, groups = list(1:ncomp),
method = "esprit"))
‘CO2’: Approximation of rank 6 by the weighted Cadzow method and its forecast.
[,1] [,2] [,3] [,4] [,5] [,6] [,7]
[1,] 16472 73.537 41.096 12.29 4.674 0.044457 0
period rate | Mod Arg | Re Im
11.998 0.000525 | 1.00053 0.52 | 0.86643 0.50035
-11.998 0.000525 | 1.00053 -0.52 | 0.86643 -0.50035
Inf 0.000451 | 1.00045 0.00 | 1.00045 0.00000
5.998 0.000287 | 1.00029 1.05 | 0.49986 0.86644
-5.998 0.000287 | 1.00029 -1.05 | 0.49986 -0.86644
Inf -0.004623 | 0.99539 0.00 | 0.99539 0.00000
Warning: this example takes a lot of computational time.
SIMUL <- FALSE
set.seed(3)
L <- 20
N <- 2 * L
K <- N - L + 1
alpha <- 0.01
sigma <- 1
signal <- 5 * sin(2 * pi * (1:N) / 6)
weights <- vector(len = K)
weights[1:K] <- alpha
weights[seq(1, K, L)] <- 1
M <- 1000
norm.meansq <- function(x) mean(x^2)
if (SIMUL) {
RMSE <- sqrt(rowMeans(replicate(M, {
x <- signal + sigma * rnorm(N)
s.alpha <- ssa(x, L = L, column.oblique = NULL,
row.oblique = weights)
c.alpha <- cadzow(s.alpha, rank = 2, tol = 1.e-8,
norm = norm.meansq,
correct = FALSE)
s <- ssa(x, L = L)
cc <- cadzow(s, rank = 2, norm = norm.meansq, tol = 1.e-8,
correct = FALSE)
c("err" = mean((cc - signal)^2),
"err.alpha" = mean((c.alpha - signal)^2))
})))
cadzow.sim <- as.data.frame(t(RMSE))
} else {
data("cadzow.sim", package = "ssabook")
}
print(cadzow.sim)
err err.alpha
1 0.3753331 0.3222088
data("elec", package = "fma")
N <- length(elec)
len <- 24
L <- 24
s <- ssa(window(elec, end = c(1993, 8)), L = L)
si <- iossa(s, nested.groups = list(c(1, 4), c(2, 3, 5:10)))
fi <- rforecast(si, groups = list(trend = c(1:2)),
len = len, only.new = FALSE)
s0 <- ssa(window(elec, start = c(1972, 8), end = c(1993, 8)),
L = L)
f0 <- vforecast(s0, groups = list(trend = c(1)),
len = len, only.new = TRUE)
si0 <- iossa(s0, nested.groups = list(c(1,4), c(2,3,5:10)))
fi0 <- vforecast(si0, groups = list(trend = c(1:2)),
len = len, only.new = TRUE)
theme <- simpleTheme(col = c("black", "red", "blue", "green"),
lwd = c(1, 1, 2, 2),
lty = c("solid", "solid",
"solid", "dashed"))
xyplot(cbind(elec,
window(fi, end = c(1993, 8)),
window(fi, start = c(1993, 9)),
fi0),
superpose = TRUE, type ="l", ylab = NULL, xlab = NULL,
auto.key = list(text = c("original", "trend",
"forecast", "forecast0"),
type = c("l", "l", "l", "l"),
lines = TRUE, points = FALSE),
par.settings = theme)
‘Elec’: Trend forecasting.
L <- 240
elec_sa <- elec - fi
s_sa <- ssa(window(elec_sa, end = c(1993, 8)), L = L)
f_sa <- rforecast(s_sa, groups = list(trend = c(1:13)),
len = len, only.new = FALSE)
theme <- simpleTheme(col = c("black", "red", "green"),
lwd = c(1, 2, 2),
lty = c("solid", "solid", "solid"))
xyplot(cbind(window(elec, start = c(1985, 12)),
window(fi, start = c(1985, 12), end = c(1993, 8)),
window(fi, start = c(1993, 9)) +
window(f_sa, start = c(1993, 9))),
superpose = TRUE, type = "l", ylab = NULL, xlab = NULL,
auto.key = list(text = c("original", "trend",
"forecast"),
type = c("l", "l", "l"),
lines = TRUE, points = FALSE),
par.settings = theme)
‘Elec’: Combined forecasting.
data("cowtemp", package = "fma")
series <- cowtemp
N <- length(series)
cut <- 14
future <- 21
len <- cut + future
r <- 1
L <- 28
Lt <- 14
s <- ssa(window(series, end = N - cut), L = L)
parestimate(s, groups = list(trend = c(1:r)),
method = "esprit")$moduli
roots(lrr(s, groups = list(trend = c(1:r))))[1]
rec <- reconstruct(s, groups = list(1:r))
st <- ssa(window(series, end = N - cut),
kind = "toeplitz-ssa", L = Lt)
parestimate(st, groups = list(trend = c(1:r)),
method = "esprit")$moduli
parestimate(st, groups = list(trend = c(1:r)),
normalize.roots = FALSE,
method = "esprit")$moduli
roots(lrr(st, groups = list(trend = c(1:r))))[1]
fr <- rforecast(s, groups = list(trend = c(1:r)),
len = len, only.new = TRUE)
fv <- vforecast(s, groups = list(trend = c(1:r)),
len = len, only.new = FALSE)
ftr <- rforecast(st, groups = list(trend = c(1:r)),
len = len, only.new = FALSE)
print(sqrt(mean((window(fr, start = 62) -
window(series, start = 62))^2)))
print(sqrt(mean((window(fv, start = 62) -
window(series, start = 62))^2)))
print(sqrt(mean((window(ftr, start = 62) -
window(series, start = 62))^2)))
theme <- simpleTheme(col = c("black", "red", "blue",
"green", "violet"),
lwd = c(1, 1, 2, 2, 1),
lty = c("solid", "solid", "solid",
"dashed", "solid"))
future.NA <- rep(NA, future)
xyplot(cbind(series, rec$F1, fr, fv, ftr),
superpose = TRUE, type = "l", ylab = NULL, xlab = NULL,
auto.key = list(text = c("original series",
"reconstructed series",
"recurrent forecast",
"vector forecast",
"recurrent Toeplitz forecast"),
type = c("l", "l", "l", "l", "l"),
lines = TRUE, points = FALSE),
par.settings = theme)
[1] 5.711485
[1] 5.253602
[1] 4.785783
‘Cowtemp’: Basic SSA and Toeplitz SSA forecasting.
perturbation <-
function(s, noise, R, Qfor, num.comp, L, level, template) {
r <- reconstruct(s, groups = list(1:num.comp))
stopifnot(length(r) == 1)
delta <- sd(residuals(r))
res <- matrix(nrow = Qfor, ncol = R)
ser <- s$F; attributes(ser) <- s$Fattr
for (j in 1:R) {
si <- ssa(ser + delta * noise[, j], L = L)
res[, j] <- rforecast(si, groups = list(1:num.comp),
len = Qfor)
}
cf <- apply(res, 1, quantile,
probs = c((1 - level) / 2, (1 + level) / 2))
out <- template
out$x[] <- ser
out$fitted[] <- r[[1]]
out$residuals[] <- residuals(r)
out$lower[] <- cf[1, ]
out$upper[] <- cf[2, ]
out$level[] <- 100 * level
out$mean[] <- rowMeans(res)
out
}
data("AustralianWine", package = "Rssa")
wine <- window(AustralianWine, end = time(AustralianWine)[174])
ser0 <- wine[, "Total"]
Q <- 66
l <- length(ser0)
ser <- window(ser0, end = time(ser0)[l-Q])
include <- min(1000, l - Q)
L <- 48
s <- ssa(ser, L = L)
plot(wcor(s, groups = 1:min(nu(s), 50)),
scales = list(at = c(10, 20, 30, 40, 50)))
set.seed(1)
R <- 100
noise <- matrix(rnorm(length(ser) * R), nrow = length(ser))
range <- 1:30
err.pert <- numeric(length(range))
err <- numeric(length(range))
k <- 1
for (num.comp in range) {
bf0 <- forecast(s, groups = list(1:num.comp),
method = "recurrent",
interval = "confidence",
len = Q, R = R, level = 0.9)
bf0.pert <- perturbation(s, noise, R, Q, num.comp, L,
level = 0.9, bf0)
err.pert[k] <- sqrt(mean((bf0.pert$upper - bf0.pert$lower)^2))
err[k] <- sqrt(mean((bf0$upper - bf0$lower)^2))
k <- k + 1
}
bf0.pert1 <- perturbation(s, noise, R, Q, 1, L,
level = 0.9, bf0)
plot(bf0.pert1, include = include, shadecols = "green",
main = paste("Perturbed SSA forecast, 1 component"))
bf0.pert12 <- perturbation(s, noise, R, Q, 12, L,
level = 0.9, bf0)
plot(bf0.pert12, include = include, shadecols = "green",
main = paste("Perturbed SSA forecast, 12 components"))
bf0.pert14 <- perturbation(s, noise, R, Q, 14, L,
level = 0.9, bf0)
plot(bf0.pert14, include = include, shadecols = "green",
main = paste("Perturbed SSA forecast, 14 components"))
start <- 48
theme <- simpleTheme(col = c("black","red","blue"),
lwd = c(2, 1, 2),
lty = c("solid", "solid", "solid"))
xyplot(cbind(window(ser0, start = c(1984, 1)),
bf0.pert12$mean,
bf0.pert14$mean),
superpose = TRUE, type = "l", ylab = NULL, xlab = NULL,
auto.key = list(text = c("`Total'",
"forecast, ET1-12",
"forecast, ET1-14"),
type = c("l", "l", "l"),
lines = TRUE, points = FALSE),
par.settings = theme)
xyplot(err + err.pert ~ range, type = "l",
ylab = NULL, xlab = NULL,
auto.key = list(text = c("Bootstrap errors",
"Perturbation errors"),
type = c("l", "l"),
lines = TRUE, points = FALSE),
scales = list(y = list(log = TRUE)))
‘Total’: w-Correlations.
‘Total’: Sizes of 90% forecasting intervals in dependence on the number of components.
‘Total’: Perturbed forecasting intervals, ET1.
‘Total’: Perturbed forecasting intervals, ET1–12.
‘Total’: Perturbed forecasting intervals, ET1–14.
‘Total’: Comparison of forecasts by ET1–12 and ET1–14.
data("g15", package = "ssabook")
xyplot(g15 ~ 1:length(g15), type = "l",
ylab = NULL, xlab = NULL)
range1 <- 14950:15050
g15_short <- g15[range1]
g15_un <- na.omit(as.vector(g15))
g15_un_short <- g15_un[range1]
p1 <- xyplot(g15_short ~ range1, type = "l",
ylab = NULL, xlab = NULL)
p2 <- xyplot(g15_un_short ~ range1, type = "l",
ylab = NULL, xlab = NULL)
plot(p1, split = c(1, 1, 2, 1), more = TRUE)
plot(p2, split = c(2, 1, 2, 1), more = FALSE)
L <- 72
neig <- min(L, 100)
s <- ssa(g15, L = 72, neig = neig)
plot(s, type = "vectors", idx = 1:8, plot.contrib = FALSE)
g <- gapfill(s, groups = list(1:2))
xyplot(g[range1] + g15[range1] ~ range1, type = "l",
ylab = NULL, xlab = NULL,
par.settings = simpleTheme(col = c("red", "black")))
spec.pgram(g15_un, detrend = FALSE, log = "no",
xlim = c(0.00, 0.02), ylim = c(0, 1e-14),
main = "", sub = "")
axis(1, at = c(1/144, 1/72), labels = c("1/144", "1/72"),
las = 2)
spec.pgram(as.vector(g), detrend = FALSE, log = "no",
xlim = c(0.00, 0.02), ylim = c(0, 1e-14),
main = "", sub = "")
axis(1, at = c(1/144, 1/72), labels = c("1/144", "1/72"),
las = 2)
‘Glonass’: Initial series with gaps.
‘Glonass’: A subseries with a gap (left) and with the suppressed gap (right).
‘Glonass’: Eigenvectors for the series with gaps, L=72.
‘Glonass’: A subseries with the filled gap.
‘Glonass’: Periodogram of the series with suppressed gaps.
‘Glonass’: Periodogram of the series with filled gaps.
s_filled = ssa(g, L = 52416, neig = 100)
pg <- vector(length = 99)
for (i in 1:99) {
pg[i] <- parestimate(s_filled, groups = list(i:(i + 1)),
method = "esprit")$period[1]
}
print(ind <- which(pg < 1/0.003))
print(pg[ind], digits = 0)
r <- reconstruct(s_filled, groups = list(day = c(ind, ind+1)))
xyplot(r$day[1:720] ~ 1:720, type = "l",
ylab = NULL, xlab = NULL)
‘Glonass’: A subseries with the 12-hours periodicity; it is extracted from the series with filled gaps and L=52416.
[1] 32 58
[1] 144 72
wine <- window(AustralianWine, end = time(AustralianWine)[168])
ser <- wine[, "Fortified"]
N <- length(ser)
L <- 84
K <- N - L + 1
rank <- 11
#Basic SSA
s0 <- ssa(ser, L = L)
r0 <- reconstruct(s0, groups = list(signal = 1:rank))$signal
#Cadzow iterations with series weights close to equal.
alpha <- 0.1
weights <- numeric( K)
weights[1:K] <- alpha
weights[seq(from = K, to = 1, by = -L)] <- 1
s <- ssa(ser, L = L, column.oblique = "identity",
row.oblique = weights, decompose.force = FALSE)
c <- cadzow(s, rank = rank)
sc <- ssa(c, L = rank + 1)
rc <- reconstruct(sc, groups = list(signal = 1:rank))$signal
xyplot(cbind(rc, ser), type = "l",
superpose = TRUE,
auto.key = list(text = c("Reconstructed",
"Initial"),
type = c("l", "l"),
lines = TRUE, poinwts = FALSE))
‘FORT’: Approximation by a series of finite rank.
#Estimation by means of the first iteration of Cadzow
#iterations (SSA)
par <- parestimate(s0, groups = list(1:rank),
method = "esprit")
print(par)
o <- order(abs(par$periods), decreasing = TRUE)
periods <- (par$periods[o])
moduli <- par$moduli[o]
len <- rank
vars <- matrix(nrow = len, ncol = rank)
for (i in 1:rank) {
if (periods[i] == Inf)
vars[, i] <- moduli[i]^(1:len)
else if (periods[i] == 2)
vars[, i] <- (-moduli[i])^(1:len)
else if (periods[i] > 0)
vars[, i] <-
moduli[i]^(1:len) * sin(2 * pi * (1:len) / periods[i])
else
vars[, i] <-
moduli[i]^(1:len) * cos(2 * pi * (1:len) / periods[i])
}
lm0 <- lm(r0[1:len] ~ 0 + ., data = data.frame(vars))
coefs0 <- coef(lm0)
print(round(coefs0[1:6], digits = 2))
print(round(coefs0[7:11], digits = 2))
period rate | Mod Arg | Re Im
5.972 0.004238 | 1.00425 1.05 | 0.49781 0.87218
-5.972 0.004238 | 1.00425 -1.05 | 0.49781 -0.87218
2.388 0.001744 | 1.00175 2.63 | -0.87403 0.48945
-2.388 0.001744 | 1.00175 -2.63 | -0.87403 -0.48945
4.000 0.000359 | 1.00036 1.57 | -0.00008 1.00036
-4.000 0.000359 | 1.00036 -1.57 | -0.00008 -1.00036
Inf -0.003318 | 0.99669 0.00 | 0.99669 0.00000
12.006 -0.005931 | 0.99409 0.52 | 0.86104 0.49680
-12.006 -0.005931 | 0.99409 -0.52 | 0.86104 -0.49680
3.015 -0.011285 | 0.98878 2.08 | -0.48570 0.86127
-3.015 -0.011285 | 0.98878 -2.08 | -0.48570 -0.86127
X1 X2 X3 X4 X5 X6
3969.23 -717.10 -927.57 105.52 137.98 -287.11
X7 X8 X9 X10 X11
215.64 -254.51 -205.12 90.44 10.95
#Estimation by means of the limit of Cadzow iterations
parc <- parestimate(sc, groups = list(1:rank),
method = "esprit")
print(parc)
oc <- order(abs(parc$periods), decreasing = TRUE)
periodsc <- (parc$periods[oc])
modulic <- parc$moduli[oc]
lenc <- rank
varsc <- matrix(nrow = lenc, ncol = rank)
for (i in 1:rank) {
if (periodsc[i] == Inf)
varsc[, i] <- modulic[i]^(1:lenc)
else if (periodsc[i] == 2)
varsc[, i] <- (-modulic[i])^(1:lenc)
else if (periodsc[i] > 0)
varsc[, i] <-
modulic[i]^(1:lenc) * sin(2 * pi * (1:lenc) / periodsc[i])
else
varsc[, i] <-
modulic[i]^(1:lenc) * cos(2 * pi * (1:lenc) / periodsc[i])
}
lm.c <- lm(rc[1:lenc] ~ 0 + ., data = data.frame(varsc))
#lm.c
coefs.c <- coef(lm.c)
print(round(coefs.c[1:6], digits = 2))
print(round(coefs.c[7:11], digits = 2))
period rate | Mod Arg | Re Im
5.975 0.004986 | 1.00500 1.05 | 0.49863 0.87258
-5.975 0.004986 | 1.00500 -1.05 | 0.49863 -0.87258
2.389 0.003402 | 1.00341 2.63 | -0.87506 0.49102
-2.389 0.003402 | 1.00341 -2.63 | -0.87506 -0.49102
3.998 0.000311 | 1.00031 1.57 | -0.00076 1.00031
-3.998 0.000311 | 1.00031 -1.57 | -0.00076 -1.00031
Inf -0.003356 | 0.99665 0.00 | 0.99665 0.00000
12.009 -0.005985 | 0.99403 0.52 | 0.86105 0.49669
-12.009 -0.005985 | 0.99403 -0.52 | 0.86105 -0.49669
3.018 -0.010620 | 0.98944 2.08 | -0.48394 0.86301
-3.018 -0.010620 | 0.98944 -2.08 | -0.48394 -0.86301
X1 X2 X3 X4 X5 X6
4005.56 -721.77 -940.64 68.30 184.45 -269.52
X7 X8 X9 X10 X11
325.92 -251.10 -255.41 154.28 61.90
idx <- seq(2, 11, 2)
coefs.c.phase <- numeric(length(idx))
phases.c <- numeric(length(idx))
periods.c.phase <- numeric(length(idx))
moduli.c.phase <- numeric(length(idx))
for (i in seq_along(idx)) {
periods.c.phase[i] <- periodsc[idx[i]]
moduli.c.phase[i] <- modulic[idx[i]]
coefs.c.phase[i] <- sqrt(coefs.c[idx[i]]^2 +
coefs.c[idx[i] + 1]^2)
phases.c[i] <- atan2(coefs.c[idx[i] + 1], coefs.c[idx[i]])
}
print("trend:")
print("coefficient * modulus^n")
print(data.frame(coefficients = coefs.c[1],
moduli = modulic[1]))
print("periodics:")
print("coefficient * modulus^n * cos(2 * pi* n/period + phase)")
print(data.frame(periods = periods.c.phase, phases = phases.c,
coefficients = coefs.c.phase,
moduli = moduli.c.phase))
[1] "trend:"
[1] "coefficient * modulus^n"
coefficients moduli
X1 4005.561 0.9966493
[1] "periodics:"
[1] "coefficient * modulus^n * cos(2 * pi* n/period + phase)"
periods phases coefficients moduli
1 12.008804 -2.225294 1185.6458 0.9940328
2 5.974637 1.216171 196.6835 1.0049988
3 3.998061 2.261753 422.9253 1.0003111
4 3.018060 -2.347688 358.1681 0.9894363
5 2.388825 0.381569 166.2372 1.0034081
data("sunspot2", package = "ssabook")
s <- ssa(sunspot2, L = 11)
r <- reconstruct(s, groups = list(Trend = 1))
plot(r, plot.method = "xyplot", superpose = TRUE)
sun.oscill <- residuals(r)
N <- length(sun.oscill)
rank <- 2
periods <- function(M, L) {
ts(sapply(1:(N - M),
function (i) {
s <- ssa(sun.oscill[i:(i + M - 1)], L = L)
par <- parestimate(s, groups = list(c(1:rank)),
method = "esprit")
abs(par$periods[1])
}),
start = time(sunspot2)[M + 1], delta = 1)
}
per22 <- periods(22, 11)
per44 <- periods(44, 22)
xyplot(cbind(per22, per44), type = "l", xlim = c(1677, 2040),
strip = strip.custom(factor.levels =
c("B = 22", "B = 44")))
M <- 22; L <- M / 2
hm <- hmatr(sun.oscill, B = M, T = M, L = L, neig = rank)
plot(hm)
M <- 44; L <- M / 2
hm <- hmatr(sun.oscill, B = M, T = M, L = L, neig = rank)
plot(hm)
‘Sunspots’: Trend extraction (top), subspace tracking of residuals
with B=22 (middle) and B=44 (bottom).
library("plyr")
forecast.mse <- function(x, F.check,
forecast.len = 1, ...) {
stopifnot(length(F.check) == forecast.len)
f <- forecast(x, h = forecast.len, ...)
mean((f$mean - F.check)^2)
}
forecast.sliding.mse <- function(F,
L, ncomp,
forecast.len = 1,
K.sliding = N %/% 4,
.progress = "none",
.parallel = FALSE,
...) {
N <- length(F)
sliding.len <- N - K.sliding - forecast.len + 1
L.max = max(L); L.min = min(L); ncomp.max = max(ncomp)
stopifnot(sliding.len > L.max)
stopifnot(ncomp.max + 1 < min(L.min, N - L.max + 1))
g <- expand.grid(L = L, i = 1:K.sliding)
aaply(g, 1,
splat(function(L, i) {
F.train <- F[seq(from = i, len = sliding.len)]
F.check <- F[seq(from = sliding.len + i,
len = forecast.len)]
s <- ssa(F.train, L = L)
sapply(ncomp,
function(ncomp) {
res <- forecast.mse(s, F.check,
forecast.len =
forecast.len,
groups =
list(1:ncomp),
...)
names(res) <- as.character(ncomp)
res
})
}),
.progress = .progress, .parallel = .parallel)
}
optim.par <- function(m0) {
m <- apply(m0, c(1, 3), mean)
mpos <- which(m == min(m), arr.ind = TRUE)
L.opt <- Ls[mpos[1]]
ncomp.opt <- ncomp[mpos[2]]
list(L.opt = L.opt, ncomp.opt = ncomp.opt, m = m)
}
data("bookings", package = "ssabook")
K.sliding <- 2
forecast.base.len <- 2*frequency(bookings)
base.len <- length(bookings)
sliding.len <- base.len - K.sliding - forecast.base.len + 1
print(sliding.len)
ncomp <- 1:100
L.min <- frequency(bookings)
Ls <- seq(L.min, 10*L.min, by = frequency(bookings))
m0 <- forecast.sliding.mse(bookings,
K.sliding = K.sliding,
L = Ls, ncomp = ncomp,
method = "recurrent",
forecast.len = forecast.base.len,
.progress = "none")
p <- optim.par(m0)
print(c(p$L.opt, p$ncomp.opt, sqrt(min(p$m))))
matplot(Ls, sqrt(p$m), ylab = "", xlab = "Window lengths",
type = "l", col = topo.colors(100))
‘Bookings’: Dependence of RMSE on L for different numbers of components.
forecast.len <- 2*frequency(bookings)
ssa.obj <- ssa(bookings, L = p$L.opt)
ssa.for <- rforecast(ssa.obj, groups = list(1:p$ncomp.opt),
len = forecast.len)
xyplot(cbind(bookings, ssa.for),
type = "l", superpose = TRUE)
xyplot(cbind(bookings, ssa.for), type = "l",
superpose = TRUE, xlim = c(21,29))
‘Bookings’: Forecast with optimal parameters.
‘Bookings’: Forecast with optimal parameters for last points.
name <- "Sweetwhite"
wine <- window(AustralianWine, end = time(AustralianWine)[174])
series <- wine[, name]
set.seed(1)
forecast.len <- 12
base.len <- length(series) - forecast.len
F.base <- window(series,
end = time(series)[base.len]) # training
F.new <- window(series,
start = time(series)[base.len + 1]) # test
K.sliding <- 12
forecast.base.len <- 2
ns <- base.len - K.sliding - forecast.base.len + 1
ncomp <- 1:15
L.min <- 24
Ls <- seq(L.min, ns - L.min, by = 12)
method <- "recurrent"
m0 <- forecast.sliding.mse(F.base, K.sliding = K.sliding,
L = Ls, ncomp = ncomp,
method = method,
forecast.len = forecast.base.len)
p <- optim.par(m0)
print(c(p$L.opt, p$ncomp.opt, sqrt(min(p$m))))
#these parameters provides the best forecast
#L.opt <- 132; ncomp.opt <- 13
# SSA forecast
F.base.short <-
window(F.base, start =
time(series)[K.sliding + forecast.base.len])
ssa.obj <- ssa(F.base.short, L = p$L.opt)
ssa.for <- forecast(ssa.obj,
groups = list(1:p$ncomp.opt),
method = method, h = forecast.len,
interval = "prediction",
level=c(0.8, 0.95))
err.ssa <- (ssa.for$mean - F.new)^2
#ARIMA forecast
sarima.fit <- auto.arima(F.base, trace = FALSE,
lambda = 0, stepwise = FALSE)
sarima.for <- forecast(sarima.fit, h = forecast.len)
err.sarima <- (sarima.for$mean - F.new)^2
#ETS forecast
ets.fit <- ets(F.base)
ets.for <- forecast(ets.fit, h = forecast.len)
err.ets <- (ets.for$mean - F.new)^2
#models
print(sarima.fit)
print(ets.fit)
print(c("SSA(L,r)", p$L.opt, p$ncomp.opt))
#RMSE for test periods
print(c("ssa", sqrt(mean(err.ssa))))
print(c("sarima", sqrt(mean(err.sarima))))
print(c("ets", sqrt(mean(err.ets))))
#plot of forecasts with confidence intervals
plot(ets.for); lines(series,col="black");
plot(sarima.for); lines(series,col="black");
plot(ssa.for); lines(series,col="black");
Series: F.base
ARIMA(1,1,0)(2,0,0)[12]
Box Cox transformation: lambda= 0
Coefficients:
ar1 sar1 sar2
-0.4165 0.4847 0.2123
s.e. 0.0722 0.0765 0.0813
sigma^2 estimated as 0.03897: log likelihood=30.78
AIC=-53.55 AICc=-53.29 BIC=-41.22
ETS(M,N,M)
Call:
ets(y = F.base)
Smoothing parameters:
alpha = 0.5571
gamma = 1e-04
Initial states:
l = 123.5798
s=1.4262 1.2408 1.0236 1.0546 1.1188 1.0852
0.7795 0.8136 0.8903 0.8834 0.8241 0.8598
sigma: 0.1732
AIC AICc BIC
2026.600 2029.887 2072.913
[1] "SSA(L,r)" "108" "8"
[1] "ssa" "55.3572366330605"
[1] "sarima" "60.1534718580799"
[1] "ets" "54.2338009066311"
‘Sweetwhite’: ETS forecast with optimal parameters.
‘Sweetwhite’: ARIMA forecast with optimal parameters.
‘Sweetwhite’: SSA forecast with optimal parameters.