\[ \min_{\beta \in \mathbb{R}^{p+1}} \left[ \frac{1}{2N} \sum_{i=1}^N \left( y_i - x_i\beta \right)^2 + \lambda ||\beta||_{1} \right] \]
ames <- AmesHousing::make_ames() set.seed(4595) data_split <- rsample::initial_split(ames, strata='Sale_Price') ames_train <- rsample::training(data_split) ames_test <- rsample::testing(data_split)
theFormula <- Sale_Price ~ Longitude + Latitude + Lot_Frontage + Lot_Area + Street + Utilities + Bldg_Type + Overall_Qual + Year_Built + Foundation + Exter_Cond + Heating + Central_Air + First_Flr_SF + Second_Flr_SF + Full_Bath + Half_Bath + Kitchen_Qual + TotRms_AbvGrd + Garage_Type + Garage_Cars + Open_Porch_SF + Pool_Area + Fence
basic_rec <- recipe(theFormula, data = ames_train) %>%
# collapse some levels into "other"
step_other(all_nominal(), threshold = 0.01) %>%
# remove variables with zero variance
step_zv(all_predictors()) %>%
# center and scale numeric variables
step_center(all_numeric()) %>% step_scale(all_numeric()) %>%
# turn categoricals into dummies, don't drop baseline
step_dummy(all_nominal(), one_hot=TRUE) %>%
# ensure there is an intercept
step_intercept()
prepped <- basic_rec %>% prep(training=ames_train, retain=TRUE) prepped
Data Recipe
Inputs:
role #variables
outcome 1
predictor 24
Training data contained 2199 data points and no missing data.
Operations:
Collapsing factor levels for Street, Utilities, ... [trained]
Zero variance filter removed no terms [trained]
Centering for Longitude, Latitude, ... [trained]
Scaling for Longitude, Latitude, ... [trained]
Dummy variables from Street, Utilities, Bldg_Type, ... [trained]
Adding intercept
Many Ways to Compute
# train matrices x_train <- prepped %>% juice(all_predictors(), composition='dgCMatrix') y_train <- prepped %>% juice(all_outcomes(), composition='matrix')
mod_glmnet <- glmnet(x=x_train, y=y_train, family='gaussian',
alpha=1, intercept=FALSE, standardize=FALSE)
plot(mod_glmnet, xvar='lambda')
coefplot::coefpath(mod_glmnet)
coefplot::coefplot(mod_glmnet, sort='magnitude', lambda=0.0004)
# train matrices x_train <- prepped %>% juice(all_predictors(), composition='dgCMatrix') y_train <- prepped %>% juice(all_outcomes(), composition='matrix')
xg_train <- xgb.DMatrix(data=x_train, label=y_train)
mod_xgboost <- xgb.train(
data=xg_train,
booster='gblinear',
objective='reg:linear',
alpha=0.0004,
nrounds=20
)
coefplot(mod_xgboost, sort='magnitude')
coefplot(mod_xgboost, sort='magnitude', zero_threshold=0.01)
# train matrices x_train_dense <- prepped %>% juice(all_predictors(), composition='matrix') y_train_dense <- prepped %>% juice(all_outcomes() ,composition='matrix')
mod_lars <- lars(x=x_train_dense, y=y_train_dense,
type='lasso',
normalize=FALSE, intercept=FALSE)
coef(mod_lars, s=50) %>% as.data.frame() %>% setNames('Value') %>%
tibble::rownames_to_column('Coefficient') %>% filter(Value != 0) %>%
mutate(HighInner=NA_real_, LowInner=NA_real_,
HighOuter=NA_real_, LowOuter=NA_real_, Model='model') %>%
mutate(Coefficient=reorder(Coefficient, Value)) %>% coefplot()
stan_recipe <- recipe(theFormula, data = ames_train) %>%
# collapse some levels into "other"
step_other(all_nominal(), threshold = 0.01) %>%
# remove variables with zero variance
step_zv(all_predictors()) %>%
# center and scale numeric variables
step_center(all_numeric()) %>% step_scale(all_numeric()) %>%
prep(training=ames_train, retain=TRUE)
train_stan <- stan_recipe %>% juice(composition='tibble')
mod_stan <- stan_glm(theFormula, data=train_stan,
family=gaussian, prior=lasso(), iter=1000)
mcmc_intervals(as.array(mod_stan))
gridExtra::grid.arrange(
mcmc_areas(as.array(mod_stan), pars=c('Longitude', 'Latitude')),
mcmc_areas(as.array(mod_stan),
pars=stringr::str_subset(names(mod_stan$coefficients), 'Overall_Qual')),
nrow=1)
\[ \min_{\beta \in \mathbb{R}^{p+1}} \left[ \frac{1}{2N} \sum_{i=1}^N \left( y_i - x_i\beta \right)^2 + \lambda ||\beta||_{1} \right] \]
\[ \min_{\beta \in \mathbb{R}^{p+1}} \left[ ||Y - X\beta||_2^2 + \lambda||\beta||_1 \right] \]
\[ \min_{\beta \in \mathbb{R}^{p+1}} ||Y - X\beta||_2^2 \\ \text{subject to } ||\beta||_1 \le t \]
\[ (Y - X\beta)^T(Y - X\beta) \\ (Y^T - \beta^TX^T)(Y - X\beta) \\ Y^TY - Y^TX\beta - \beta^TX^TY + \beta^TX^TX\beta \\ Y^TY - 2Y^TX\beta + \beta^TX^TX\beta \\ Y^TY+ \beta^TX^TX\beta - 2Y^TX\beta \]
\[ \min_{x} \frac{1}{2} x^TVV^Tx + c^Tx \\ Ax = b \\ l \preceq x \preceq u \]
\[ \min_{\beta^+, \beta^-} \left[(\beta^+ - \beta^-)XX^T(\beta^+ - \beta^-) - 2YX^T(\beta^+ - \beta^-) \right] \\ \text{subject to } \sum_{j=1}^p (\beta^+ - \beta^-) \le t \\ \beta^+ \succeq 0, \beta^- \succeq 0 \]
lasso.qp <- function(x, y, t)
{
y <- as.vector(y)
p <- ncol(x)
# get negative and positive side of X
Vn <- x %*% cbind(diag(p), -diag(p), 0)
# compute YX
YX <- colSums(x * -y)
Zn <- c (2*YX, -2*YX, 0)
# get upper bound for coefficients
mod <- lm.fit(x, y)
# replace any NAs with the maximum of all coefs
b_ols <- mod$coefficients %>% replace_na(max(mod$coefficients, na.rm=TRUE))
# create matrices and vectors
u <- c(abs(b_ols), abs(b_ols), sum(abs(b_ols)))
A <- matrix (c(rep (1, 2*p), 1), nrow=1)
b <- c(min(t, sum(abs(b_ols))))
# solve
soln <- LowRankQP::LowRankQP(Vmat=sqrt(2)*t(Vn), dvec=Zn, Amat=A,
bvec=b, uvec=u, method="LU")
return(round(soln$alpha[1:p] - soln$alpha[(p+1):(2*p)], digits=5))
}
# train matrices x_train_dense <- prepped %>% juice(all_predictors(), composition='matrix') y_train_dense <- prepped %>% juice(all_outcomes() ,composition='matrix')
mod_quad <- lasso.qp(x=x_train_dense, y=y_train_dense, t=5)
LowRankQP CONVERGED IN 27 ITERATIONS
Primal Feasibility = 6.6547305e-11
Dual Feasibility = 3.1086245e-14
Complementarity Value = 3.4538714e-08
Duality Gap = 3.4540108e-08
Termination Condition = 1.8830090e-11
tibble(Coefficient=colnames(x_train_dense), Value=mod_quad) %>%
filter(Value != 0) %>%
mutate(HighInner=NA_real_, LowInner=NA_real_,
HighOuter=NA_real_, LowOuter=NA_real_, Model='model') %>%
mutate(Coefficient=reorder(Coefficient, Value)) %>% coefplot()
# train matrices x_train_dense <- prepped %>% juice(all_predictors(), composition='matrix') y_train_dense <- prepped %>% juice(all_outcomes() ,composition='matrix')
mod_keras <- keras_model_sequential() %>%
layer_dense(units = 1, activation = "linear",
kernel_regularizer=regularizer_l1(l=0.02),
input_shape = dim(x_train_dense)[2])
mod_keras
Model ________________________________________________________________________________ Layer (type) Output Shape Param # ================================================================================ dense_1 (Dense) (None, 1) 64 ================================================================================ Total params: 64 Trainable params: 64 Non-trainable params: 0 ________________________________________________________________________________
mod_keras %>% compile(
loss = "mse",
optimizer = optimizer_rmsprop(),
metrics = list("mean_absolute_error")
)
history <- mod_keras %>%
fit(
x=x_train_dense,
y=y_train_dense,
epochs = 30,
batch_size=64,
validation_split = 0.1,
view_metrics=TRUE,
callbacks=list(
callback_early_stopping(monitor='val_acc',
patience=10),
callback_reduce_lr_on_plateau(monitor='val_loss',
factor=0.1,
patience=5),
callback_tensorboard("logs/run_lasso")
)
)
plot(history)
get_weights(mod_keras)[[1]] %>% as_data_frame() %>% rename(Value=V1) %>%
mutate(Coefficient=colnames(x_train_dense)) %>% filter(abs(Value) > 1E-3) %>%
mutate(HighInner=NA_real_, LowInner=NA_real_,
HighOuter=NA_real_, LowOuter=NA_real_, Model='model') %>%
mutate(Coefficient=reorder(Coefficient, Value)) %>% coefplot()
# test matrices x_test <- prepped %>% bake(all_predictors(), composition='matrix', new_data=ames_test) y_test <- prepped %>% bake(all_outcomes() ,composition='matrix', new_data=ames_test)
predictions <- list(
glmnet=predict(mod_glmnet, newx=x_test, s=0.0004),
xgboost=predict(mod_xgboost, newdata=x_test[, mod_xgboost$feature_names]),
lars=predict(mod_lars, newx=x_test, s=50)$fit,
stan=predict(mod_stan, newdata=stan_recipe %>%
bake(new_data=ames_test, composition='tibble')),
quad=x_test %*% mod_quad,
keras=predict(mod_keras, x=x_test)
)
predictions <- predictions %>%
map(~data.frame(Pred=as.numeric(.))) %>%
map_df(. %>% mutate(Actual=as.numeric(y_test)) %>% mutate(.Resid=Actual-Pred),
.id='Model')
ggplot(predictions, aes(x=Actual, y=Pred, color=Model)) + geom_abline(color='grey50', linetype=2) + geom_point(alpha=1/2, shape=1, size=1) + geom_smooth() +facet_wrap(~Model) + theme(legend.position='none')
error_measures <- predictions %>% group_by(Model) %>% summarize(RMSE=sqrt(mean(.Resid^2))) %>% arrange(RMSE) %>% mutate(Model=reorder(Model, RMSE))
ggplot(error_measures, aes(x=Model, y=RMSE)) + geom_col(width=0.5, fill='lightblue')
glmnetxgboostlarsstankerasmod_tidy_glmnet <- linear_reg(penalty=0.004) %>%
set_engine('glmnet') %>%
fit_xy(x_train_dense, y_train)
mod_tidy_spark <- linear_reg(penalty=0.004) %>%
set_engine('sprak') %>%
fit(theFormula, data=train_stan)
mod_tidy_stan <- linear_reg(penalty=0.004) %>%
set_engine('stan') %>%
fit(theFormula, data=train_stan)
mod_tidy_glmnet <- linear_reg(penalty=0.004) %>%
set_engine('keras') %>%
fit_xy(x_train_dense, y_train)
[1] "AmesHousing" "bayesplot" "coefplot" "distr" [5] "DT" "ggplot2" "glmnet" "here" [9] "keras" "knitr" "lars" "leaflet" [13] "leaflet.extras" "LowRankQP" "magrittr" "parsnip" [17] "purrr" "recipes" "rsample" "rstanarm" [21] "stringr" "tibble" "useful" "xgboost"
Thank You