\[ \newcommand{\argmin}{\mathop{\mathrm{argmin}}} \newcommand{\argmax}{\mathop{\mathrm{argmax}}} \newcommand{\minimize}{\mathop{\mathrm{minimize}}} \newcommand{\st}{\mathop{\mathrm{subject\,\,to}}} \]
Consider the problem \[ \minimize_{\beta_0,\beta} \; \sum_{i=1}^n \psi_\tau(y_i - \beta_0 - x_i^T \beta) + \lambda \|\beta\|_1 \] where \[ \psi_\tau(v) = \max\{\tau v, (\tau-1) v)\}, \] often called the “pinball” or “tilted \(\ell_1\)” loss, for a quantile level \(\tau \in (0,1)\). We can rewrite this succintly letting \(y\) be the response vector with elements \(y_i\), and \(X\) the predictor matrix with rows \(x_i\), as \[ \minimize_\beta \; \psi_\tau(y-X\beta) + \lambda \|\beta\|_1. \] Here we interpret \(\psi_\tau\) as being applied componentwise to \(y-X\beta\), and we suppress the notation for the intercept; as usual, to include an intercept, we just append a columns of 1s to \(X\).
We can reformulate this as an LP: \[\begin{alignat*}{2} &\minimize_{\beta,u,v} \quad && 1_n^\top v + \lambda 1_p^\top u\\ &\st \quad && v \geq \tau (y-X\beta), \\ &&& v \geq (\tau-1) (y-X\beta), \\ &&& u \geq \beta, \; u \geq -\beta. \end{alignat*}\] Here \(1_k\) is the all 1s vector of dimension \(k\), and all inequalities are interpreted componentwise.
We run a bunch of simple tests against rqPen
.
library(rqPen)
## Warning: package 'rqPen' was built under R version 3.6.2
## Loading required package: quantreg
## Warning: package 'quantreg' was built under R version 3.6.2
## Loading required package: SparseM
##
## Attaching package: 'SparseM'
## The following object is masked from 'package:base':
##
## backsolve
## Loading required package: regpro
## Loading required package: denpro
library(quantgen)
set.seed(33)
n = 500
p = 50
x = matrix(rnorm(n*p), n, p)
x0 = matrix(rnorm(n*p), n,)
mu = function(x) x[1] + x[2]
y = apply(x, 1, mu) + rnorm(n)
tau = 0.8
lambda = sqrt(get_lambda_max(x, y, Matrix::Diagonal(p)))
# No intercept, no standardize
a1 = quantile_lasso(x, y, tau, lambda, intercept=FALSE, standardize=FALSE, lp_solver="gurobi")
a2 = quantile_lasso(x, y, tau, lambda, intercept=FALSE, standardize=FALSE, lp_solver="glpk")
a3 = rq.lasso.fit(x, y, tau, lambda/n, intercept=FALSE, scalex=FALSE)
max(abs(coef(a1)-coef(a3)))
## [1] 6.661338e-16
max(abs(coef(a2)-coef(a3)))
## [1] 5.199834e-16
max(abs(predict(a1,x0) - predict(a3,x0)))
## [1] 4.440892e-15
max(abs(predict(a2,x0) - predict(a3,x0)))
## [1] 4.218847e-15
# Yes intercept, no standardize
a1 = quantile_lasso(x, y, tau, lambda, intercept=TRUE, standardize=FALSE, lp_solver="gurobi")
a2 = quantile_lasso(x, y, tau, lambda, intercept=TRUE, standardize=FALSE, lp_solver="glpk")
a3 = rq.lasso.fit(x, y, tau, lambda/n, intercept=TRUE, scalex=FALSE)
max(abs(coef(a1)-coef(a3)))
## [1] 4.440892e-16
max(abs(coef(a2)-coef(a3)))
## [1] 5.828671e-16
max(abs(predict(a1,x0) - predict(a3,x0)))
## [1] 3.552714e-15
max(abs(predict(a2,x0) - predict(a3,x0)))
## [1] 3.108624e-15
# No intercept, yes standardize
a1 = quantile_lasso(x, y, tau, lambda, intercept=FALSE, standardize=TRUE, lp_solver="gurobi")
a2 = quantile_lasso(x, y, tau, lambda, intercept=FALSE, standardize=TRUE, lp_solver="glpk")
a3 = rq.lasso.fit(x, y, tau, lambda/n, intercept=FALSE, scalex=TRUE)
max(abs(coef(a1)[-1]-coef(a3)))
## [1] 1.776357e-15
max(abs(coef(a2)[-1]-coef(a3)))
## [1] 3.213325e-15
max(abs(predict(a1,x0) - predict(a3,x0)))
## [1] 0.03196446
max(abs(predict(a2,x0) - predict(a3,x0)))
## [1] 0.03196446
# I think that rqPen is forgetting to add the result of centering the x's back into the intercept
# Yes intercept, yes standardize
a1 = quantile_lasso(x, y, tau, lambda, intercept=TRUE, standardize=TRUE, lp_solver="gurobi")
a2 = quantile_lasso(x, y, tau, lambda, intercept=TRUE, standardize=TRUE, lp_solver="glpk")
a3 = rq.lasso.fit(x, y, tau, lambda/n, intercept=TRUE, scalex=TRUE)
max(abs(coef(a1)[-1]-coef(a3)[-1]))
## [1] 9.298118e-16
max(abs(coef(a2)[-1]-coef(a3)[-1]))
## [1] 1.110223e-15
max(abs(predict(a1,x0) - predict(a3,x0)))
## [1] 0.0003127317
max(abs(predict(a2,x0) - predict(a3,x0)))
## [1] 0.0003127317
# I think that rqPen is forgetting to add the result of centering the x's back into the intercept
On a bigger problem, we compare speeds across LP solvers (Gurobi and GLPK), and also rqPen
.
n = 2000
p = 1000
x = matrix(rnorm(n*p), n, p)
mu = function(x) x[1] + x[2]
y = apply(x, 1, mu) + rnorm(n)
tau = 0.8
lambda = sqrt(get_lambda_max(x, y, Matrix::Diagonal(p)))
system.time(quantile_lasso(x, y, tau, lambda, intercept=FALSE, standardize=FALSE, lp_solver="gurobi", verbose=TRUE))[3]
## Gurobi Optimizer version 9.0.2 build v9.0.2rc0 (mac64)
## Optimize a model with 6000 rows, 4000 columns and 4008000 nonzeros
## Model fingerprint: 0xaedbc93e
## Coefficient statistics:
## Matrix range [8e-09, 4e+00]
## Objective range [1e+00, 2e+01]
## Bounds range [0e+00, 0e+00]
## RHS range [5e-05, 5e+00]
##
## Concurrent LP optimizer: dual simplex and barrier
## Showing barrier log only...
##
## Presolve time: 1.87s
## Presolved: 6000 rows, 4000 columns, 4008000 nonzeros
##
## Ordering time: 0.39s
##
## Barrier statistics:
## Free vars : 1000
## AA' NZ : 1.600e+07
## Factor NZ : 1.800e+07 (roughly 150 MBytes of memory)
## Factor Ops : 7.202e+10 (roughly 1 second per iteration)
## Threads : 3
##
## Objective Residual
## Iter Primal Dual Primal Dual Compl Time
## 0 6.85817877e+04 0.00000000e+00 5.63e-01 0.00e+00 2.14e+01 21s
## 1 1.78096393e+04 9.49507874e+01 1.05e-01 4.43e-01 2.79e+00 22s
## 2 3.44405637e+03 1.59043062e+02 2.01e-03 4.00e-07 3.67e-01 24s
## 3 1.10149986e+03 4.59573707e+02 0.00e+00 3.55e-07 7.15e-02 25s
## 4 8.25932552e+02 7.34515386e+02 0.00e+00 5.40e-08 1.02e-02 27s
## 5 7.79189458e+02 7.66561697e+02 0.00e+00 2.53e-08 1.40e-03 29s
## 6 7.73316411e+02 7.72053902e+02 0.00e+00 7.96e-09 1.40e-04 30s
## 7 7.72756101e+02 7.72606903e+02 0.00e+00 9.48e-10 1.66e-05 32s
## 8 7.72689894e+02 7.72676085e+02 0.00e+00 1.23e-10 1.53e-06 34s
## 9 7.72683892e+02 7.72682719e+02 0.00e+00 1.44e-10 1.30e-07 36s
## 10 7.72683315e+02 7.72683273e+02 0.00e+00 3.49e-11 4.65e-09 38s
## 11 7.72683313e+02 7.72683276e+02 3.49e-08 3.15e-11 4.23e-09 39s
## 12 7.72683311e+02 7.72683278e+02 3.20e-08 2.94e-11 3.65e-09 41s
## 13 7.72683310e+02 7.72683279e+02 5.38e-08 2.94e-11 3.58e-09 42s
## 14 7.72683306e+02 7.72683283e+02 5.99e-08 3.07e-11 2.61e-09 44s
## 15 7.72683305e+02 7.72683285e+02 5.33e-08 2.86e-11 2.27e-09 45s
## 16 7.72683305e+02 7.72683285e+02 5.84e-08 2.79e-11 2.17e-09 47s
## 17 7.72683304e+02 7.72683286e+02 6.34e-08 2.78e-11 2.10e-09 48s
##
## Barrier solved model in 17 iterations and 48.25 seconds
## Optimal objective 7.72683304e+02
##
## Crossover log...
##
## 1318 DPushes remaining with DInf 0.0000000e+00 49s
## 837 DPushes remaining with DInf 0.0000000e+00 50s
## 0 DPushes remaining with DInf 0.0000000e+00 52s
##
## 13 PPushes remaining with PInf 0.0000000e+00 52s
## 0 PPushes remaining with PInf 0.0000000e+00 52s
##
## Push phase complete: Pinf 0.0000000e+00, Dinf 3.7498105e-11 52s
##
## Iteration Objective Primal Inf. Dual Inf. Time
## 879 7.7268330e+02 0.000000e+00 0.000000e+00 53s
##
## Solved with barrier
## Solved in 879 iterations and 52.89 seconds
## Optimal objective 7.726832954e+02
## elapsed
## 54.086
system.time(quantile_lasso(x, y, tau, lambda, intercept=FALSE, standardize=FALSE, lp_solver="glpk", verbose=TRUE))[3]
## GLPK Simplex Optimizer, v4.65
## 6000 rows, 4000 columns, 4008000 non-zeros
## 0: obj = 0.000000000e+00 inf = 1.389e+03 (2000)
## 902: obj = 1.261033175e+02 inf = 9.526e+02 (1628) 6
## 1799: obj = 2.372101509e+02 inf = 7.887e+02 (1403) 6
## 2506: obj = 3.286998822e+02 inf = 6.978e+02 (1254) 5
## 3069: obj = 4.303857100e+02 inf = 6.312e+02 (1118) 4
## 3503: obj = 4.949763675e+02 inf = 5.856e+02 (1027) 4
## 3855: obj = 5.646858109e+02 inf = 5.498e+02 (943) 4
## 4214: obj = 6.384010480e+02 inf = 5.168e+02 (868) 3
## 4517: obj = 6.905085279e+02 inf = 4.876e+02 (821) 3
## 4828: obj = 7.415304760e+02 inf = 4.628e+02 (758) 3
## 5142: obj = 8.139067694e+02 inf = 4.304e+02 (693) 3
## 5382: obj = 8.512203322e+02 inf = 4.137e+02 (660) 2
## 5598: obj = 9.229966998e+02 inf = 3.940e+02 (621) 2
## 5821: obj = 1.036921843e+03 inf = 3.734e+02 (570) 2
## 6057: obj = 1.146800895e+03 inf = 3.544e+02 (528) 3
## 6259: obj = 1.195069124e+03 inf = 3.405e+02 (502) 2
## 6461: obj = 1.277382530e+03 inf = 3.279e+02 (475) 2
## 6663: obj = 1.329699884e+03 inf = 3.088e+02 (438) 2
## 6865: obj = 1.378378044e+03 inf = 2.890e+02 (411) 2
## 7068: obj = 1.491801888e+03 inf = 2.686e+02 (378) 2
## 7271: obj = 1.537529965e+03 inf = 2.578e+02 (361) 2
## 7473: obj = 1.639093252e+03 inf = 2.369e+02 (332) 2
## 7675: obj = 1.725325484e+03 inf = 2.179e+02 (309) 2
## 7877: obj = 1.786058881e+03 inf = 1.967e+02 (281) 2
## 8079: obj = 1.823474549e+03 inf = 1.793e+02 (262) 2
## 8283: obj = 1.912911114e+03 inf = 1.661e+02 (250) 2
## 8485: obj = 2.126382741e+03 inf = 1.433e+02 (223) 2
## 8687: obj = 2.316795534e+03 inf = 1.219e+02 (192) 2
## 8889: obj = 2.457797522e+03 inf = 1.053e+02 (170) 2
## 9092: obj = 2.577421908e+03 inf = 9.143e+01 (154) 2
## 9295: obj = 2.724050611e+03 inf = 8.163e+01 (143) 2
## 9497: obj = 2.884221159e+03 inf = 6.836e+01 (120) 2
## 9654: obj = 2.976613857e+03 inf = 2.451e-12 (0)
## * 9817: obj = 2.620728391e+03 inf = 2.415e-12 (997) 2
## * 10019: obj = 2.272835509e+03 inf = 2.136e-12 (998) 2
## * 10222: obj = 2.131841916e+03 inf = 1.918e-12 (997) 2
## * 10424: obj = 1.917507538e+03 inf = 1.977e-12 (999) 2
## * 10626: obj = 1.753117061e+03 inf = 2.152e-12 (999) 2
## * 10828: obj = 1.638027390e+03 inf = 2.097e-12 (1000) 2
## * 11030: obj = 1.564930429e+03 inf = 2.130e-12 (988) 2
## * 11232: obj = 1.525762894e+03 inf = 1.686e-12 (993) 2
## * 11434: obj = 1.463913401e+03 inf = 1.408e-12 (1000) 2
## * 11636: obj = 1.409314079e+03 inf = 1.704e-12 (999) 2
## * 11841: obj = 1.359087832e+03 inf = 1.388e-12 (996) 2
## * 12073: obj = 1.290525067e+03 inf = 1.200e-12 (991) 2
## * 12263: obj = 1.258146092e+03 inf = 1.379e-12 (986) 2
## * 12445: obj = 1.243782098e+03 inf = 1.268e-12 (995) 2
## * 12647: obj = 1.211280664e+03 inf = 1.350e-12 (985) 2
## * 12871: obj = 1.169295814e+03 inf = 1.488e-12 (999) 2
## * 13118: obj = 1.140007881e+03 inf = 1.296e-12 (990) 2
## * 13312: obj = 1.126658036e+03 inf = 1.225e-12 (970) 2
## * 13493: obj = 1.112511590e+03 inf = 1.502e-12 (998) 2
## * 13708: obj = 1.090967315e+03 inf = 1.113e-12 (992) 2
## * 13955: obj = 1.055292294e+03 inf = 1.157e-12 (995) 2
## * 14164: obj = 1.035592732e+03 inf = 1.135e-12 (989) 3
## * 14409: obj = 1.023906445e+03 inf = 1.264e-12 (994) 2
## * 14671: obj = 9.991204762e+02 inf = 1.076e-12 (984) 3
## * 14974: obj = 9.664307124e+02 inf = 8.334e-13 (999) 3
## * 15278: obj = 9.489398820e+02 inf = 8.855e-13 (987) 3
## * 15579: obj = 9.318908788e+02 inf = 1.032e-12 (984) 2
## * 15783: obj = 9.160952621e+02 inf = 8.448e-13 (998) 3
## * 16088: obj = 8.989395528e+02 inf = 5.279e-13 (978) 3
## * 16391: obj = 8.891247034e+02 inf = 7.406e-13 (959) 3
## * 16694: obj = 8.794072565e+02 inf = 7.189e-13 (995) 3
## * 16997: obj = 8.683929594e+02 inf = 6.368e-13 (930) 3
## * 17305: obj = 8.609563255e+02 inf = 6.554e-13 (982) 3
## * 17608: obj = 8.480629115e+02 inf = 3.755e-13 (963) 3
## * 17946: obj = 8.364987251e+02 inf = 4.366e-13 (968) 3
## * 18282: obj = 8.293034614e+02 inf = 7.352e-13 (974) 3
## * 18554: obj = 8.245119750e+02 inf = 8.340e-13 (979) 3
## * 18859: obj = 8.146597446e+02 inf = 4.884e-13 (990) 3
## * 19224: obj = 8.054090951e+02 inf = 4.898e-13 (836) 3
## * 19532: obj = 8.008205206e+02 inf = 4.935e-13 (944) 4
## * 19937: obj = 7.920453854e+02 inf = 2.515e-13 (969) 4
## * 20344: obj = 7.874032660e+02 inf = 4.356e-13 (764) 4
## * 20750: obj = 7.813680481e+02 inf = 3.599e-13 (987) 4
## * 21183: obj = 7.772213370e+02 inf = 3.011e-13 (832) 4
## * 21643: obj = 7.736432273e+02 inf = 1.597e-13 (837) 4
## * 22085: obj = 7.726840208e+02 inf = 1.611e-13 (26) 5
## * 22103: obj = 7.726832954e+02 inf = 2.074e-13 (0)
## OPTIMAL LP SOLUTION FOUND
## elapsed
## 418.252
system.time(rq.lasso.fit(x, y, tau, lambda/n, intercept=FALSE, scalex=FALSE))[3]
## elapsed
## 122.177