The least absolute shrinkage and selection operator (LASSO) shrinks the regression coefficients by adding a \(l_1-norm\) penalty term to the optimization problem. More information about LASSO can be found in e.g., Tibshirani (1996). For the ordinary least squares problem with \(l_1\) regularization this leads to the following optimization problem \[ \underset{\beta}{\text{minimize}} ~~ \frac{1}{2} || y - X \beta ||_2^2 + \lambda ||\beta||_1 \] with \(X \in \mathbb{R}^{m \times n}\), \(y \in \mathbb{R}^{m}\), \(\beta \in \mathbb{R}^n\) and \(0 < \lambda \in \mathbb{R}\). The problem can be rewritten into a quadratic optimization problem \[ \begin{array}{rl} \underset{(\beta, \gamma, t)}{\text{minimize}} & \frac{1}{2} \gamma^\top \gamma + \lambda \boldsymbol{1}^\top t \\ \text{subject to} & y - X \beta = \gamma \\ & -t \leq \beta \leq t \end{array} \] with \(\gamma \in \mathbb{R}^n\), \(t \in \mathbb{R}^n\)
Alternatively the problem can be written as a second order cone problem \[ \begin{array}{rl} \underset{(\beta, t, u)}{\text{minimize}} & \frac{1}{2} u + \lambda \boldsymbol{1}^\top t \\ \text{subject to} & ||(1 - u, ~ 2 y - 2 X \beta)||_2 \leq 1 + u \\ & -t \leq \beta \leq t \end{array} \] with \(u \in \mathbb{R}\).
In R package glmnet is typically the method of choice to obtain the LASSO estimate.
library("glmnet")
data("QuickStartExample")
1
lambda <-
cbind(x = QuickStartExample[["x"]])
x <- drop(QuickStartExample[["y"]])
y <-
glmnet(x, y, family = "gaussian", alpha = 1, lambda = lambda,
mo <-intercept = FALSE, standardize = FALSE)
setNames(as.vector(coef(mo)), rownames(coef(mo)))
glmnet_beta <-round(glmnet_beta, 4)
## (Intercept) V1 V2 V3 V4 V5
## 0.0000 0.7434 0.0000 0.0000 0.0000 0.0000
## V6 V7 V8 V9 V10 V11
## 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000
## V12 V13 V14 V15 V16 V17
## 0.0000 0.0000 -0.5943 0.0000 0.0000 0.0000
## V18 V19 V20
## 0.0000 0.0000 0.0000
library(slam)
Sys.setenv(ROI_LOAD_PLUGINS = FALSE)
library(ROI)
library(ROI.plugin.qpoases)
library(ROI.plugin.ecos)
function(...) {
dbind <- function(x, y) {
.dbind <- simple_triplet_zero_matrix(NROW(x), NCOL(y))
A <- simple_triplet_zero_matrix(NROW(y), NCOL(x))
B <-rbind(cbind(x, A), cbind(B, y))
}Reduce(.dbind, list(...))
}
function(x, y, lambda) {
qp_lasso <- simple_triplet_zero_matrix
stzm <- simple_triplet_diag_matrix
stdm <- NROW(x); n <- NCOL(x)
m <- dbind(stzm(n), stdm(1, m), stzm(n))
Q0 <- c(b = double(n), g = double(m), t = lambda * rep(1, n))
a0 <- OP(objective = Q_objective(Q = Q0, L = a0))
op <-## y - X %*% beta = gamma <=> X %*% beta + gamma = y
cbind(x, stdm(1, m), stzm(m, n))
A1 <- L_constraint(A1, eq(m), y)
LC1 <-## -t <= beta <=> 0 <= beta + t
cbind(stdm(1, n), stzm(n, m), stdm(1, n))
A2 <- L_constraint(A2, geq(n), double(n))
LC2 <-## beta <= t <=> beta - t <= 0
cbind(stdm(1, n), stzm(n, m), stdm(-1, n))
A3 <- L_constraint(A3, leq(n), double(n))
LC3 <-constraints(op) <- rbind(LC1, LC2, LC3)
bounds(op) <- V_bound(ld = -Inf, nobj = ncol(Q0))
op
}
qp_lasso(x, y, 0)
op <- ROI_solve(op, "qpoases")) (qp0 <-
## Optimal solution found.
## The objective value is: 3.740948e+01
qp_lasso(x, y, lambda * NROW(x))
op <- ROI_solve(op, "qpoases")) (qp1 <-
## Optimal solution found.
## The objective value is: 3.955956e+02
function(x, y, lambda) {
cp_lasso <- simple_triplet_matrix
stm <- simple_triplet_zero_matrix
stzm <- simple_triplet_diag_matrix
stdm <- NROW(x); n <- NCOL(x); nobj <- 2 * n + 1
m <- OP(c(beta = double(n), t = lambda * rep(1, n), u = 0.5))
op <-## ||(1 - u, 2 y - 2 X %*% beta)||_2 <= 1 + u
rbind(stm(1, nobj, -1), stm(1, nobj, 1), cbind(2 * x, stzm(m, n + 1)))
A1 <- C_constraint(A1, K_soc(m + 2), rhs = c(1, 1, 2 * y))
LC1 <-## -t <= z <=> 0 <= z + t
cbind(stdm(1, n), stdm(1, n), stzm(n, 1))
A2 <-## z <= t <=> z - t <= 0
cbind(stdm(1, n), stdm(-1, n), stzm(n, 1))
A3 <- L_constraint(rbind(-A2, A3), leq(2 * n), double(2 * n))
LC2 <-constraints(op) <- rbind(LC2, LC1)
bounds(op) <- V_bound(ld = -Inf, nobj = nobj)
op
}
cp_lasso(x, y, 0)
op <- ROI_solve(op, "ecos")) (cp0 <-
## Optimal solution found.
## The objective value is: 3.740947e+01
cp_lasso(x, y, lambda * NROW(x))
op <- ROI_solve(op, "ecos")) (cp1 <-
## Optimal solution found.
## The objective value is: 3.955956e+02
ncol(x)
n <-cbind(lm = coef(lm.fit(x, y)), qp = head(solution(qp0), n),
cp = head(solution(cp0), n))
## lm qp cp
## x1 1.408632566 1.408632566 1.408654479
## x2 0.030929816 0.030929816 0.030918400
## x3 0.771681001 0.771681001 0.771719349
## x4 0.089075024 0.089075024 0.089089244
## x5 -0.911285048 -0.911285048 -0.911326234
## x6 0.606341963 0.606341963 0.606354439
## x7 0.125746415 0.125746415 0.125746263
## x8 0.398702698 0.398702698 0.398716974
## x9 -0.047255704 -0.047255704 -0.047251243
## x10 0.150093482 0.150093482 0.150111930
## x11 0.220645029 0.220645029 0.220664537
## x12 -0.081151968 -0.081151968 -0.081156941
## x13 -0.054793670 -0.054793670 -0.054799131
## x14 -1.179567856 -1.179567856 -1.179595088
## x15 -0.165550295 -0.165550295 -0.165565041
## x16 -0.042037759 -0.042037759 -0.042050631
## x17 -0.053693736 -0.053693736 -0.053705524
## x18 0.026055220 0.026055220 0.026061204
## x19 0.003846485 0.003846485 0.003842586
## x20 -1.144855738 -1.144855738 -1.144916814
cbind(lm = round(glmnet_beta, 4), qp = round(c(0, head(solution(qp1), n)), 4),
cp = round(c(0, head(solution(cp1), n)), 4))
## lm qp cp
## (Intercept) 0.0000 0.0000 0.0000
## V1 0.7434 0.7434 0.7434
## V2 0.0000 0.0000 0.0000
## V3 0.0000 0.0000 0.0000
## V4 0.0000 0.0000 0.0000
## V5 0.0000 0.0000 0.0000
## V6 0.0000 0.0000 0.0000
## V7 0.0000 0.0000 0.0000
## V8 0.0000 0.0000 0.0000
## V9 0.0000 0.0000 0.0000
## V10 0.0000 0.0000 0.0000
## V11 0.0000 0.0000 0.0000
## V12 0.0000 0.0000 0.0000
## V13 0.0000 0.0000 0.0000
## V14 -0.5943 -0.5943 -0.5942
## V15 0.0000 0.0000 0.0000
## V16 0.0000 0.0000 0.0000
## V17 0.0000 0.0000 0.0000
## V18 0.0000 0.0000 0.0000
## V19 0.0000 0.0000 0.0000
## V20 0.0000 0.0000 0.0000
doi:10.1111/j.2517-6161.1996.tb02080.x