The least absolute shrinkage and selection operator (LASSO) shrinks the regression coefficients by adding a l1−norm penalty term to the optimization problem. More information about LASSO can be found in e.g., Tibshirani (1996). For the ordinary least squares problem with l1 regularization this leads to the following optimization problem minimizeβ 12||y−Xβ||22+λ||β||1 with X∈Rm×n, y∈Rm, β∈Rn and 0<λ∈R. The problem can be rewritten into a quadratic optimization problem \begin{array}{rl} \underset{(\beta, \gamma, t)}{\text{minimize}} & \frac{1}{2} \gamma^\top \gamma + \lambda \boldsymbol{1}^\top t \\ \text{subject to} & y - X \beta = \gamma \\ & -t \leq \beta \leq t \end{array} with \gamma \in \mathbb{R}^n, t \in \mathbb{R}^n
Alternatively the problem can be written as a second order cone problem \begin{array}{rl} \underset{(\beta, t, u)}{\text{minimize}} & \frac{1}{2} u + \lambda \boldsymbol{1}^\top t \\ \text{subject to} & ||(1 - u, ~ 2 y - 2 X \beta)||_2 \leq 1 + u \\ & -t \leq \beta \leq t \end{array} with u \in \mathbb{R}.
In R package glmnet is typically the method of choice to obtain the LASSO estimate.
library("glmnet")
data("QuickStartExample")
1
lambda <-
cbind(x = QuickStartExample[["x"]])
x <- drop(QuickStartExample[["y"]])
y <-
glmnet(x, y, family = "gaussian", alpha = 1, lambda = lambda,
mo <-intercept = FALSE, standardize = FALSE)
setNames(as.vector(coef(mo)), rownames(coef(mo)))
glmnet_beta <-round(glmnet_beta, 4)
## (Intercept) V1 V2 V3 V4 V5
## 0.0000 0.7434 0.0000 0.0000 0.0000 0.0000
## V6 V7 V8 V9 V10 V11
## 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000
## V12 V13 V14 V15 V16 V17
## 0.0000 0.0000 -0.5943 0.0000 0.0000 0.0000
## V18 V19 V20
## 0.0000 0.0000 0.0000
library(slam)
Sys.setenv(ROI_LOAD_PLUGINS = FALSE)
library(ROI)
library(ROI.plugin.qpoases)
library(ROI.plugin.ecos)
function(...) {
dbind <- function(x, y) {
.dbind <- simple_triplet_zero_matrix(NROW(x), NCOL(y))
A <- simple_triplet_zero_matrix(NROW(y), NCOL(x))
B <-rbind(cbind(x, A), cbind(B, y))
}Reduce(.dbind, list(...))
}
function(x, y, lambda) {
qp_lasso <- simple_triplet_zero_matrix
stzm <- simple_triplet_diag_matrix
stdm <- NROW(x); n <- NCOL(x)
m <- dbind(stzm(n), stdm(1, m), stzm(n))
Q0 <- c(b = double(n), g = double(m), t = lambda * rep(1, n))
a0 <- OP(objective = Q_objective(Q = Q0, L = a0))
op <-## y - X %*% beta = gamma <=> X %*% beta + gamma = y
cbind(x, stdm(1, m), stzm(m, n))
A1 <- L_constraint(A1, eq(m), y)
LC1 <-## -t <= beta <=> 0 <= beta + t
cbind(stdm(1, n), stzm(n, m), stdm(1, n))
A2 <- L_constraint(A2, geq(n), double(n))
LC2 <-## beta <= t <=> beta - t <= 0
cbind(stdm(1, n), stzm(n, m), stdm(-1, n))
A3 <- L_constraint(A3, leq(n), double(n))
LC3 <-constraints(op) <- rbind(LC1, LC2, LC3)
bounds(op) <- V_bound(ld = -Inf, nobj = ncol(Q0))
op
}
qp_lasso(x, y, 0)
op <- ROI_solve(op, "qpoases")) (qp0 <-
## Optimal solution found.
## The objective value is: 3.740948e+01
qp_lasso(x, y, lambda * NROW(x))
op <- ROI_solve(op, "qpoases")) (qp1 <-
## Optimal solution found.
## The objective value is: 3.955956e+02
function(x, y, lambda) {
cp_lasso <- simple_triplet_matrix
stm <- simple_triplet_zero_matrix
stzm <- simple_triplet_diag_matrix
stdm <- NROW(x); n <- NCOL(x); nobj <- 2 * n + 1
m <- OP(c(beta = double(n), t = lambda * rep(1, n), u = 0.5))
op <-## ||(1 - u, 2 y - 2 X %*% beta)||_2 <= 1 + u
rbind(stm(1, nobj, -1), stm(1, nobj, 1), cbind(2 * x, stzm(m, n + 1)))
A1 <- C_constraint(A1, K_soc(m + 2), rhs = c(1, 1, 2 * y))
LC1 <-## -t <= z <=> 0 <= z + t
cbind(stdm(1, n), stdm(1, n), stzm(n, 1))
A2 <-## z <= t <=> z - t <= 0
cbind(stdm(1, n), stdm(-1, n), stzm(n, 1))
A3 <- L_constraint(rbind(-A2, A3), leq(2 * n), double(2 * n))
LC2 <-constraints(op) <- rbind(LC2, LC1)
bounds(op) <- V_bound(ld = -Inf, nobj = nobj)
op
}
cp_lasso(x, y, 0)
op <- ROI_solve(op, "ecos")) (cp0 <-
## Optimal solution found.
## The objective value is: 3.740947e+01
cp_lasso(x, y, lambda * NROW(x))
op <- ROI_solve(op, "ecos")) (cp1 <-
## Optimal solution found.
## The objective value is: 3.955956e+02
ncol(x)
n <-cbind(lm = coef(lm.fit(x, y)), qp = head(solution(qp0), n),
cp = head(solution(cp0), n))
## lm qp cp
## x1 1.408632566 1.408632566 1.408654479
## x2 0.030929816 0.030929816 0.030918400
## x3 0.771681001 0.771681001 0.771719349
## x4 0.089075024 0.089075024 0.089089244
## x5 -0.911285048 -0.911285048 -0.911326234
## x6 0.606341963 0.606341963 0.606354439
## x7 0.125746415 0.125746415 0.125746263
## x8 0.398702698 0.398702698 0.398716974
## x9 -0.047255704 -0.047255704 -0.047251243
## x10 0.150093482 0.150093482 0.150111930
## x11 0.220645029 0.220645029 0.220664537
## x12 -0.081151968 -0.081151968 -0.081156941
## x13 -0.054793670 -0.054793670 -0.054799131
## x14 -1.179567856 -1.179567856 -1.179595088
## x15 -0.165550295 -0.165550295 -0.165565041
## x16 -0.042037759 -0.042037759 -0.042050631
## x17 -0.053693736 -0.053693736 -0.053705524
## x18 0.026055220 0.026055220 0.026061204
## x19 0.003846485 0.003846485 0.003842586
## x20 -1.144855738 -1.144855738 -1.144916814
cbind(lm = round(glmnet_beta, 4), qp = round(c(0, head(solution(qp1), n)), 4),
cp = round(c(0, head(solution(cp1), n)), 4))
## lm qp cp
## (Intercept) 0.0000 0.0000 0.0000
## V1 0.7434 0.7434 0.7434
## V2 0.0000 0.0000 0.0000
## V3 0.0000 0.0000 0.0000
## V4 0.0000 0.0000 0.0000
## V5 0.0000 0.0000 0.0000
## V6 0.0000 0.0000 0.0000
## V7 0.0000 0.0000 0.0000
## V8 0.0000 0.0000 0.0000
## V9 0.0000 0.0000 0.0000
## V10 0.0000 0.0000 0.0000
## V11 0.0000 0.0000 0.0000
## V12 0.0000 0.0000 0.0000
## V13 0.0000 0.0000 0.0000
## V14 -0.5943 -0.5943 -0.5942
## V15 0.0000 0.0000 0.0000
## V16 0.0000 0.0000 0.0000
## V17 0.0000 0.0000 0.0000
## V18 0.0000 0.0000 0.0000
## V19 0.0000 0.0000 0.0000
## V20 0.0000 0.0000 0.0000
doi:10.1111/j.2517-6161.1996.tb02080.x