The least absolute shrinkage and selection operator (LASSO) shrinks the regression coefficients by adding a \(l_1-norm\) penalty term to the optimization problem. More information about LASSO can be found in e.g., Tibshirani (1996). For the ordinary least squares problem with \(l_1\) regularization this leads to the following optimization problem \[ \underset{\beta}{\text{minimize}} ~~ \frac{1}{2} || y - X \beta ||_2^2 + \lambda ||\beta||_1 \] with \(X \in \mathbb{R}^{m \times n}\), \(y \in \mathbb{R}^{m}\), \(\beta \in \mathbb{R}^n\) and \(0 < \lambda \in \mathbb{R}\). The problem can be rewritten into a quadratic optimization problem \[ \begin{array}{rl} \underset{(\beta, \gamma, t)}{\text{minimize}} & \frac{1}{2} \gamma^\top \gamma + \lambda \boldsymbol{1}^\top t \\ \text{subject to} & y - X \beta = \gamma \\ & -t \leq \beta \leq t \end{array} \] with \(\gamma \in \mathbb{R}^n\), \(t \in \mathbb{R}^n\)

Alternatively the problem can be written as a second order cone problem \[ \begin{array}{rl} \underset{(\beta, t, u)}{\text{minimize}} & \frac{1}{2} u + \lambda \boldsymbol{1}^\top t \\ \text{subject to} & ||(1 - u, ~ 2 y - 2 X \beta)||_2 \leq 1 + u \\ & -t \leq \beta \leq t \end{array} \] with \(u \in \mathbb{R}\).


In R package glmnet is typically the method of choice to obtain the LASSO estimate.

library(glmnet)
data(QuickStartExample)
lambda <- 1
mo <- glmnet(x, y, family = "gaussian", alpha = 1, lambda = lambda, 
    intercept = FALSE, standardize = FALSE)
glmnet_beta <- setNames(as.vector(coef(mo)), rownames(coef(mo)))
round(glmnet_beta, 4)
## (Intercept)          V1          V2          V3          V4          V5 
##      0.0000      0.7434      0.0000      0.0000      0.0000      0.0000 
##          V6          V7          V8          V9         V10         V11 
##      0.0000      0.0000      0.0000      0.0000      0.0000      0.0000 
##         V12         V13         V14         V15         V16         V17 
##      0.0000      0.0000     -0.5943      0.0000      0.0000      0.0000 
##         V18         V19         V20 
##      0.0000      0.0000      0.0000

Estimation QP

library(slam)
Sys.setenv(ROI_LOAD_PLUGINS = FALSE)
library(ROI)
library(ROI.plugin.qpoases)
library(ROI.plugin.ecos)

dbind <- function(...) {
    .dbind <- function(x, y) {
        A <- simple_triplet_zero_matrix(NROW(x), NCOL(y))
        B <- simple_triplet_zero_matrix(NROW(y), NCOL(x))
        rbind(cbind(x, A), cbind(B, y))
    }
    Reduce(.dbind, list(...))
}
qp_lasso <- function(x, y, lambda) {
    stzm <- simple_triplet_zero_matrix
    stdm <- simple_triplet_diag_matrix
    m <- NROW(x); n <- NCOL(x)
    Q0 <- dbind(stzm(n), stdm(1, m), stzm(n))
    a0 <- c(b = double(n), g = double(m), t = lambda * rep(1, n))
    op <- OP(objective = Q_objective(Q = Q0, L = a0))
    ## y - X %*% beta = gamma  <=>  X %*% beta + gamma = y
    A1 <- cbind(x, stdm(1, m), stzm(m, n))
    LC1 <- L_constraint(A1, eq(m), y)
    ##  -t <= beta  <=>  0 <= beta + t
    A2 <- cbind(stdm(1, n), stzm(n, m), stdm(1, n))
    LC2 <- L_constraint(A2, geq(n), double(n))
    ##   beta <= t  <=>  beta - t <= 0
    A3 <- cbind(stdm(1, n), stzm(n, m), stdm(-1, n))
    LC3 <- L_constraint(A3, leq(n), double(n))
    constraints(op) <- rbind(LC1, LC2, LC3)
    bounds(op) <- V_bound(ld = -Inf, nobj = ncol(Q0))
    op
}

op <- qp_lasso(x, y, 0)
(qp0 <- ROI_solve(op, "qpoases"))
## Optimal solution found.
## The objective value is: 3.740948e+01
op <- qp_lasso(x, y, lambda * NROW(x))
(qp1 <- ROI_solve(op, "qpoases"))
## Optimal solution found.
## The objective value is: 3.955956e+02

Estimation CP

cp_lasso <- function(x, y, lambda) {
    stm <- simple_triplet_matrix
    stzm <- simple_triplet_zero_matrix
    stdm <- simple_triplet_diag_matrix
    m <- NROW(x); n <- NCOL(x); nobj <- 2 * n + 1
    op <- OP(c(beta = double(n), t = lambda * rep(1, n), u = 0.5))
    ## ||(1 - u, 2 y - 2 X %*% beta)||_2 <= 1 + u
    A1 <- rbind(stm(1, nobj, -1), stm(1, nobj, 1), cbind(2 * x, stzm(m, n + 1)))
    LC1 <- C_constraint(A1, K_soc(m + 2), rhs = c(1, 1, 2 * y))
    ## -t <= z  <=>  0 <= z + t
    A2 <- cbind(stdm(1, n), stdm(1, n), stzm(n, 1))
    ##  z <= t  <=>  z - t <= 0
    A3 <- cbind(stdm(1, n), stdm(-1, n), stzm(n, 1))
    LC2 <- L_constraint(rbind(-A2, A3), leq(2 * n), double(2 * n))
    constraints(op) <- rbind(LC2, LC1)
    bounds(op) <- V_bound(ld = -Inf, nobj = nobj)
    op
}

op <- cp_lasso(x, y, 0)
(cp0 <- ROI_solve(op, "ecos"))
## Optimal solution found.
## The objective value is: 3.740947e+01
op <- cp_lasso(x, y, lambda * NROW(x))
(cp1 <- ROI_solve(op, "ecos"))
## Optimal solution found.
## The objective value is: 3.955956e+02

Comparison

n <- ncol(x)
cbind(lm = coef(lm.fit(x, y)), qp = head(solution(qp0), n), 
      cp = head(solution(cp0), n))
##               lm           qp           cp
## x1   1.408632566  1.408632566  1.408654479
## x2   0.030929816  0.030929816  0.030918400
## x3   0.771681001  0.771681001  0.771719349
## x4   0.089075024  0.089075024  0.089089244
## x5  -0.911285048 -0.911285048 -0.911326234
## x6   0.606341963  0.606341963  0.606354439
## x7   0.125746415  0.125746415  0.125746263
## x8   0.398702698  0.398702698  0.398716974
## x9  -0.047255704 -0.047255704 -0.047251243
## x10  0.150093482  0.150093482  0.150111930
## x11  0.220645029  0.220645029  0.220664537
## x12 -0.081151968 -0.081151968 -0.081156941
## x13 -0.054793670 -0.054793670 -0.054799131
## x14 -1.179567856 -1.179567856 -1.179595088
## x15 -0.165550295 -0.165550295 -0.165565041
## x16 -0.042037759 -0.042037759 -0.042050631
## x17 -0.053693736 -0.053693736 -0.053705524
## x18  0.026055220  0.026055220  0.026061204
## x19  0.003846485  0.003846485  0.003842586
## x20 -1.144855738 -1.144855738 -1.144916814
cbind(lm = round(glmnet_beta, 4), qp = round(c(0, head(solution(qp1), n)), 4), 
      cp = round(c(0, head(solution(cp1), n)), 4))
##                  lm      qp      cp
## (Intercept)  0.0000  0.0000  0.0000
## V1           0.7434  0.7434  0.7434
## V2           0.0000  0.0000  0.0000
## V3           0.0000  0.0000  0.0000
## V4           0.0000  0.0000  0.0000
## V5           0.0000  0.0000  0.0000
## V6           0.0000  0.0000  0.0000
## V7           0.0000  0.0000  0.0000
## V8           0.0000  0.0000  0.0000
## V9           0.0000  0.0000  0.0000
## V10          0.0000  0.0000  0.0000
## V11          0.0000  0.0000  0.0000
## V12          0.0000  0.0000  0.0000
## V13          0.0000  0.0000  0.0000
## V14         -0.5943 -0.5943 -0.5942
## V15          0.0000  0.0000  0.0000
## V16          0.0000  0.0000  0.0000
## V17          0.0000  0.0000  0.0000
## V18          0.0000  0.0000  0.0000
## V19          0.0000  0.0000  0.0000
## V20          0.0000  0.0000  0.0000

References

  • Tibshirani, Robert (1996), Regression Shrinkage and Selection Via the Lasso. Journal of the Royal Statistical Society: Series B (Methodological), 58: 267-288. URL doi:10.1111/j.2517-6161.1996.tb02080.x