Coupling from the Past
In the stats department at Warwick we have a reading group who are currently discussing Mark Huber‘s 2015 book, Perfect Simulation. Back in May, I presented the 3rd chapter on Coupling from the Past (CFTP; Propp & Wilson, 1996). The mono_cftp_Ising function below implements monotonic CFTP for the Ising model (equivalent to the Potts model with only q=2 states). This algorithm returns a single, unbiased sample from the Ising model for a given inverse temperature, β. When combined with the exchange algorithm (Murray, Ghahramani & MacKay, 2006), this enables exact posterior inference for β. However, problems can occur when the value of β is too large, since the underlying single-site Gibbs sampler can fail to converge.
Previously, I’ve compared the Gibbs sampler with Swendsen-Wang for the Potts model, as implemented in my R package bayesImageS. I showed that the Gibbs sampler exhibits torpid mixing when β is larger than the critical value, . This slowdown can be quantified using CFTP, since it provides an accurate estimate of how many iterations an MCMC algorithm takes to converge.
Below the critical point, runtime is less than a second for 25,200 iterations of random scan, single-site Gibbs updates (T=5 recursions). The coalescence time roughly doubles with every increase in β (log-linear). At β=0.88, the average runtime is around 6 seconds for between 100k and 800k iterations. This trend accelerates beyond the critical point, requiring almost an hour for up to 838 million iterations to converge. This is for an image with only n=400 pixels. This torpid mixing can play havoc with the exchange algorithm, as you can imagine. For example, see the numerical results reported by McGrory, Titterington, Reeves & Pettitt in their 2009 paper.
CFTP might not be all that useful in practice, but it forms the basis for more advanced algorithms such as the perfect slice sampler of Mira, Møller & Roberts (2001) or the bounding chain for Swendsen-Wang (Huber, 2003). My code for the perfect slice sampler in the gist below appears to have a bug, but mono_cftp_Ising should be working fine.
| set.seed(1234) | |
| # Exercise 3.1: simple symmetric random walk on {0,1,2} | |
| upd <- function(x, U) { | |
| x + ifelse(x < 2 && U > 0.5, 1, 0) – ifelse(x > 0 && U <= 0.5, 1, 0) | |
| } | |
| x <- 1 | |
| upd(x,0.25) | |
| upd(x,0.75) | |
| upd(upd(x,0.75),0.25) | |
| # complete coupling: simulating *forwards* | |
| x = 0 | |
| y = 1 | |
| z = 2 | |
| t = 0 | |
| while (x != y || y != z) { | |
| t <- t+1 | |
| u <- runif(1) | |
| print(paste(t,x,y,z,u)) | |
| x <- upd(x,u) | |
| y <- upd(y,u) | |
| z <- upd(z,u) | |
| } | |
| identical(x,y,z) | |
| # we actually need the time-reversal of this chain… | |
| # Coupling_from_the_past (pg. 46) | |
| cftp <- function(t, x0, A, upd) { | |
| U <- runif(t) | |
| x <- x0 | |
| print(U) | |
| if (!all(U > A[1,], U < A[2,])) { | |
| x <- cftp(t, x0, A, upd) | |
| } | |
| print(x) | |
| for (i in 1:t) { | |
| x <- upd(x,U[i]) | |
| } | |
| return(x) | |
| } | |
| # Example 3.2 (pg. 47) | |
| cftp(2, sample(0:2,1), matrix(c(0,0.5,0,0.5),nrow=2), upd) | |
| # Doubling_Coupling_from_the_past (pg. 49) | |
| # NOTE: since t is now variable-length, need to search for a subsequence | |
| # that satisfies A. Therefore, A is limited to an open interval | |
| # on [0,1]^t where the lower bound (and upper bound) is identical for | |
| # every dimension. This limit is imposed for computational convenience, | |
| # rather than any mathematical necessity. | |
| double_cftp <- function(t, x0, A, upd) { | |
| U <- runif(t) | |
| x <- x0 | |
| print(t) | |
| idx <- which(U > A[1] & U < A[2]) # search for subsequence | |
| if (sum(diff(idx) == 1) == 0) { | |
| x <- double_cftp(2*t, x0, A, upd) | |
| } | |
| for (i in 1:t) { | |
| x <- upd(x,U[i]) | |
| } | |
| return(x) | |
| } | |
| double_cftp(1, sample(0:2,1), c(0,0.5), upd) | |
| library(bayesImageS) | |
| Xmax <- matrix(1,nrow=2,ncol=2) | |
| Xmin <- matrix(–1,nrow=2,ncol=2) | |
| Xneigh <- getNeighbors(Xmax, neiStruc=c(2,2,0,0)) | |
| # single site update using random scan (pg. 18) | |
| Ising_Gibbs <- function(x, u, idx, beta, n) { | |
| neigh <- n[idx,] | |
| n1 <- sum(x[neigh] == 1) | |
| n2 <- sum(x[neigh] == –1) | |
| if (log(u) < beta*n1 – log(exp(beta*n1) + exp(beta*n2))) { | |
| x[idx] <- 1 | |
| } else { | |
| x[idx] <- –1 | |
| } | |
| return(x); | |
| } | |
| Ising_Gibbs(c(Xmax,0), 0.9, 2, 0.5, Xneigh) | |
| Ising_Gibbs(c(Xmin,0), 0.9, 2, 0.5, Xneigh) | |
| Ising_Gibbs(c(Xmax,0), 0.02, 2, 0.5, Xneigh) | |
| Ising_Gibbs(c(Xmin,0), 0.02, 2, 0.5, Xneigh) | |
| # Monotonic_coupling_from_the_past (pg. 52) | |
| mono_cftp_Ising <- function(t, x0, y0, beta, neigh) { | |
| print(t) | |
| U <- runif(t) | |
| I <- sample(1:nrow(neigh), t, replace = TRUE) | |
| for (i in 1:t) { | |
| x0 <- Ising_Gibbs(x0, U[i], I[i], beta, neigh) | |
| y0 <- Ising_Gibbs(y0, U[i], I[i], beta, neigh) | |
| } | |
| if (identical(x0, y0)) { | |
| return(x0) | |
| } else { | |
| x0 <- mono_cftp_Ising(2*t, x0, y0, beta, neigh) | |
| # read twice | |
| for (i in 1:t) { | |
| x0 <- Ising_Gibbs(x0, U[i], I[i], beta, neigh) | |
| } | |
| return(x0) | |
| } | |
| } | |
| system.time(X <- mono_cftp_Ising(length(Xmax), c(Xmin,0), c(Xmax,0), 0.5, Xneigh)) | |
| matrix(X[1:length(Xmin)],nrow=nrow(Xmin)) # drop the dummy state |
| # simple slice sampler (eg. 8.1 in Robert & Casella, 2003, pg. 323) | |
| f_x <- function(x) {0.5 * exp(– sqrt(x))} | |
| inv_fx <- function(y) {log(2*y)^2} | |
| sliceSamp <- function(niter, x0, f_x, inv_fx) { | |
| samp <- samp_y <- samp_u <- numeric(niter) | |
| samp[1] <- x0 | |
| samp_y[1] <- 0 | |
| samp_u[1] <- 0 | |
| for (it in 2:niter) { | |
| samp_u[it] <- runif(1) | |
| samp_y[it] <- samp_u[it] * f_x(samp[it–1]) | |
| u2 <- runif(1) | |
| samp[it] <- u2 * inv_fx(samp_y[it]) | |
| } | |
| list(x=samp, y=samp_y, u=samp_u) | |
| } | |
| # see Fig. 8.1 on pg. 324 | |
| x_0 <- rexp(1) | |
| s_samp <- sliceSamp(50000, x_0, f_x, inv_fx) | |
| hist(s_samp$x,col="lightgreen",breaks=100,freq=FALSE,main="",xlab="x") | |
| # eg. 8.2: truncated Gaussian | |
| f_x <- function(x) {(x >= 0) * exp(–0.5 * (x+3)^2) * (x <= 1)} | |
| inv_fx <- function(y) { | |
| x <- sqrt(–2 * log(y)) – 3 | |
| ifelse(x >= 0 && x <= 1, x, 1) | |
| } | |
| # see Fig. 8.2 on pg. 325 | |
| x_0 <- runif(1) | |
| nit <- 11 | |
| s_samp <- sliceSamp(nit, x_0, f_x, inv_fx) | |
| plot(s_samp$x, s_samp$y, ylim=c(0,f_x(0)), xlim=c(0,1), xlab="x", ylab="y") | |
| curve(exp(–0.5 * (x+3)^2), add=TRUE, col=4, lwd=2, lty=2) | |
| lines(rep(s_samp$x,each=2), c(0, rep(s_samp$y[2:nit], each=2), s_samp$y[nit])) | |
| abline(h=0,lty=2) | |
| # perfect slice sampler (Huber 2016, pg. 57) | |
| x_1 <- 0 # unique maximum value of f_x | |
| x_0 <- 1 # unique minimum | |
| coupledSliceSamp <- function(x_0, x_1, u, u2, f_x, inv_fx) { | |
| y_0 <- u * f_x(x_0) | |
| x_0 <- u2 * inv_fx(y_0) | |
| y_1 <- u * f_x(x_1) | |
| if (f_x(x_0) >= y_1) { | |
| x_1 <- x_0 | |
| } else { | |
| x_1 <- u2 * inv_fx(y_1) | |
| } | |
| list(x_0=x_0, y_0=y_0, x_1=x_1, y_1=y_1) | |
| } | |
| coupledSliceSamp(x_0, x_1, runif(1), runif(1), f_x, inv_fx) | |
| perfectSliceSamp <- function(x_0, x_1, t, f_x, inv_fx) { | |
| u <- runif(t) | |
| u2 <- runif(t) | |
| for (i in 1:t) { | |
| res <- coupledSliceSamp(x_0, x_1, u[i], u2[i], f_x, inv_fx) | |
| x_0 <- res$x_0 | |
| x_1 <- res$x_1 | |
| } | |
| if (x_0 == x_1) { | |
| return(x_1) | |
| } else { | |
| print(paste("T = ", log(t)/log(2) + 1, "; t =", 2*t)) | |
| x_1 <- perfectSliceSamp(x_0, x_1, 2*t, f_x, inv_fx) | |
| # read twice | |
| for (i in t:1) { | |
| res <- coupledSliceSamp(x_1, x_1, u[i], u2[i], f_x, inv_fx) | |
| x_0 <- res$x_0 | |
| x_1 <- res$x_1 | |
| } | |
| return(x_1) | |
| } | |
| } | |
| x_1 <- 0 # unique maximum value of f_x | |
| x_0 <- 1 # unique minimum | |
| perfectSliceSamp(x_0, x_1, 1, f_x, inv_fx) | |
| library(truncnorm) | |
| nit <- 1000 | |
| sam <- numeric(nit) | |
| for (it in 1:nit) { | |
| sam[it] <- perfectSliceSamp(1, 0, 1, f_x, inv_fx) | |
| } | |
| hist(sam, freq=FALSE, main="Truncated Gaussian", xlab="x", ylim=c(0,dtruncnorm(0,0,1,–3)),breaks=20) | |
| curve(dtruncnorm(x,a=0,b=1,mean=–3,sd=1), add=TRUE, col=4, lwd=2, lty=2) | |
| sam2 <- sliceSamp(1000, runif(1), f_x, inv_fx) | |
| hist(sam2$x, freq=FALSE, main="Truncated Gaussian", xlab="x", ylim=c(0,dtruncnorm(0,0,1,–3)),breaks=30) | |
| curve(dtruncnorm(x,a=0,b=1,mean=–3,sd=1), add=TRUE, col=4, lwd=2, lty=2) | |
| range(sam2$x) | |
| which(sam2$x > 1) |