Processing math: 58%
  • References

The least absolute shrinkage and selection operator (LASSO) shrinks the regression coefficients by adding a l1norm penalty term to the optimization problem. More information about LASSO can be found in e.g., Tibshirani (1996). For the ordinary least squares problem with l1 regularization this leads to the following optimization problem minimizeβ  12||yXβ||22+λ||β||1 with XRm×n, yRm, βRn and 0<λR. The problem can be rewritten into a quadratic optimization problem \begin{array}{rl} \underset{(\beta, \gamma, t)}{\text{minimize}} & \frac{1}{2} \gamma^\top \gamma + \lambda \boldsymbol{1}^\top t \\ \text{subject to} & y - X \beta = \gamma \\ & -t \leq \beta \leq t \end{array} with \gamma \in \mathbb{R}^n, t \in \mathbb{R}^n

Alternatively the problem can be written as a second order cone problem \begin{array}{rl} \underset{(\beta, t, u)}{\text{minimize}} & \frac{1}{2} u + \lambda \boldsymbol{1}^\top t \\ \text{subject to} & ||(1 - u, ~ 2 y - 2 X \beta)||_2 \leq 1 + u \\ & -t \leq \beta \leq t \end{array} with u \in \mathbb{R}.


In R package glmnet is typically the method of choice to obtain the LASSO estimate.

library("glmnet")
data("QuickStartExample")
lambda <- 1

x <- cbind(x = QuickStartExample[["x"]])
y <- drop(QuickStartExample[["y"]])

mo <- glmnet(x, y, family = "gaussian", alpha = 1, lambda = lambda,
             intercept = FALSE, standardize = FALSE)
glmnet_beta <- setNames(as.vector(coef(mo)), rownames(coef(mo)))
round(glmnet_beta, 4)
## (Intercept)          V1          V2          V3          V4          V5 
##      0.0000      0.7434      0.0000      0.0000      0.0000      0.0000 
##          V6          V7          V8          V9         V10         V11 
##      0.0000      0.0000      0.0000      0.0000      0.0000      0.0000 
##         V12         V13         V14         V15         V16         V17 
##      0.0000      0.0000     -0.5943      0.0000      0.0000      0.0000 
##         V18         V19         V20 
##      0.0000      0.0000      0.0000

Estimation QP

library(slam)
Sys.setenv(ROI_LOAD_PLUGINS = FALSE)
library(ROI)
library(ROI.plugin.qpoases)
library(ROI.plugin.ecos)

dbind <- function(...) {
    .dbind <- function(x, y) {
        A <- simple_triplet_zero_matrix(NROW(x), NCOL(y))
        B <- simple_triplet_zero_matrix(NROW(y), NCOL(x))
        rbind(cbind(x, A), cbind(B, y))
    }
    Reduce(.dbind, list(...))
}
qp_lasso <- function(x, y, lambda) {
    stzm <- simple_triplet_zero_matrix
    stdm <- simple_triplet_diag_matrix
    m <- NROW(x); n <- NCOL(x)
    Q0 <- dbind(stzm(n), stdm(1, m), stzm(n))
    a0 <- c(b = double(n), g = double(m), t = lambda * rep(1, n))
    op <- OP(objective = Q_objective(Q = Q0, L = a0))
    ## y - X %*% beta = gamma  <=>  X %*% beta + gamma = y
    A1 <- cbind(x, stdm(1, m), stzm(m, n))
    LC1 <- L_constraint(A1, eq(m), y)
    ##  -t <= beta  <=>  0 <= beta + t
    A2 <- cbind(stdm(1, n), stzm(n, m), stdm(1, n))
    LC2 <- L_constraint(A2, geq(n), double(n))
    ##   beta <= t  <=>  beta - t <= 0
    A3 <- cbind(stdm(1, n), stzm(n, m), stdm(-1, n))
    LC3 <- L_constraint(A3, leq(n), double(n))
    constraints(op) <- rbind(LC1, LC2, LC3)
    bounds(op) <- V_bound(ld = -Inf, nobj = ncol(Q0))
    op
}

op <- qp_lasso(x, y, 0)
(qp0 <- ROI_solve(op, "qpoases"))
## Optimal solution found.
## The objective value is: 3.740948e+01
op <- qp_lasso(x, y, lambda * NROW(x))
(qp1 <- ROI_solve(op, "qpoases"))
## Optimal solution found.
## The objective value is: 3.955956e+02

Estimation CP

cp_lasso <- function(x, y, lambda) {
    stm <- simple_triplet_matrix
    stzm <- simple_triplet_zero_matrix
    stdm <- simple_triplet_diag_matrix
    m <- NROW(x); n <- NCOL(x); nobj <- 2 * n + 1
    op <- OP(c(beta = double(n), t = lambda * rep(1, n), u = 0.5))
    ## ||(1 - u, 2 y - 2 X %*% beta)||_2 <= 1 + u
    A1 <- rbind(stm(1, nobj, -1), stm(1, nobj, 1), cbind(2 * x, stzm(m, n + 1)))
    LC1 <- C_constraint(A1, K_soc(m + 2), rhs = c(1, 1, 2 * y))
    ## -t <= z  <=>  0 <= z + t
    A2 <- cbind(stdm(1, n), stdm(1, n), stzm(n, 1))
    ##  z <= t  <=>  z - t <= 0
    A3 <- cbind(stdm(1, n), stdm(-1, n), stzm(n, 1))
    LC2 <- L_constraint(rbind(-A2, A3), leq(2 * n), double(2 * n))
    constraints(op) <- rbind(LC2, LC1)
    bounds(op) <- V_bound(ld = -Inf, nobj = nobj)
    op
}

op <- cp_lasso(x, y, 0)
(cp0 <- ROI_solve(op, "ecos"))
## Optimal solution found.
## The objective value is: 3.740947e+01
op <- cp_lasso(x, y, lambda * NROW(x))
(cp1 <- ROI_solve(op, "ecos"))
## Optimal solution found.
## The objective value is: 3.955956e+02

Comparison

n <- ncol(x)
cbind(lm = coef(lm.fit(x, y)), qp = head(solution(qp0), n), 
      cp = head(solution(cp0), n))
##               lm           qp           cp
## x1   1.408632566  1.408632566  1.408654479
## x2   0.030929816  0.030929816  0.030918400
## x3   0.771681001  0.771681001  0.771719349
## x4   0.089075024  0.089075024  0.089089244
## x5  -0.911285048 -0.911285048 -0.911326234
## x6   0.606341963  0.606341963  0.606354439
## x7   0.125746415  0.125746415  0.125746263
## x8   0.398702698  0.398702698  0.398716974
## x9  -0.047255704 -0.047255704 -0.047251243
## x10  0.150093482  0.150093482  0.150111930
## x11  0.220645029  0.220645029  0.220664537
## x12 -0.081151968 -0.081151968 -0.081156941
## x13 -0.054793670 -0.054793670 -0.054799131
## x14 -1.179567856 -1.179567856 -1.179595088
## x15 -0.165550295 -0.165550295 -0.165565041
## x16 -0.042037759 -0.042037759 -0.042050631
## x17 -0.053693736 -0.053693736 -0.053705524
## x18  0.026055220  0.026055220  0.026061204
## x19  0.003846485  0.003846485  0.003842586
## x20 -1.144855738 -1.144855738 -1.144916814
cbind(lm = round(glmnet_beta, 4), qp = round(c(0, head(solution(qp1), n)), 4), 
      cp = round(c(0, head(solution(cp1), n)), 4))
##                  lm      qp      cp
## (Intercept)  0.0000  0.0000  0.0000
## V1           0.7434  0.7434  0.7434
## V2           0.0000  0.0000  0.0000
## V3           0.0000  0.0000  0.0000
## V4           0.0000  0.0000  0.0000
## V5           0.0000  0.0000  0.0000
## V6           0.0000  0.0000  0.0000
## V7           0.0000  0.0000  0.0000
## V8           0.0000  0.0000  0.0000
## V9           0.0000  0.0000  0.0000
## V10          0.0000  0.0000  0.0000
## V11          0.0000  0.0000  0.0000
## V12          0.0000  0.0000  0.0000
## V13          0.0000  0.0000  0.0000
## V14         -0.5943 -0.5943 -0.5942
## V15          0.0000  0.0000  0.0000
## V16          0.0000  0.0000  0.0000
## V17          0.0000  0.0000  0.0000
## V18          0.0000  0.0000  0.0000
## V19          0.0000  0.0000  0.0000
## V20          0.0000  0.0000  0.0000

References

  • Tibshirani, Robert (1996), Regression Shrinkage and Selection Via the Lasso. Journal of the Royal Statistical Society: Series B (Methodological), 58: 267-288. URL doi:10.1111/j.2517-6161.1996.tb02080.x