Gradient Boosted Trees using XGBoost

gbt(
  dataset,
  rvar,
  evar,
  type = "classification",
  lev = "",
  max_depth = 6,
  learning_rate = 0.3,
  min_split_loss = 0,
  min_child_weight = 1,
  subsample = 1,
  nrounds = 100,
  early_stopping_rounds = 10,
  nthread = 12,
  wts = "None",
  seed = NA,
  data_filter = "",
  envir = parent.frame(),
  ...
)

Arguments

dataset	Dataset
rvar	The response variable in the model
evar	Explanatory variables in the model
type	Model type (i.e., "classification" or "regression")
lev	Level to use as the first column in prediction output
max_depth	Maximum 'depth' of tree
learning_rate	Learning rate (eta)
min_split_loss	Minimal improvement (gamma)
min_child_weight	Minimum number of instances allowed in each node
subsample	Subsample ratio of the training instances (0-1)
nrounds	Number of trees to create
early_stopping_rounds	Early stopping rule
nthread	Number of parallel threads to use. Defaults to 12 if available
wts	Weights to use in estimation
seed	Random seed to use as the starting point
data_filter	Expression entered in, e.g., Data > View to filter the dataset in Radiant. The expression should be a string (e.g., "price > 10000")
envir	Environment to extract data from
...	Further arguments to pass to xgboost

Value

A list with all variables defined in gbt as an object of class gbt

Details

See https://radiant-rstats.github.io/docs/model/gbt.html for an example in Radiant

Examples

if (FALSE) {
gbt(titanic, "survived", c("pclass", "sex"), lev = "Yes") %>% summary()
gbt(titanic, "survived", c("pclass", "sex")) %>% str()
}
gbt(titanic, "survived", c("pclass", "sex"), lev = "Yes", early_stopping_rounds = 0) %>% summary()
#> Gradient Boosted Trees (XGBoost)
#> Type                 : Classification
#> Data                 : titanic
#> Response variable    : survived
#> Level                : Yes in survived
#> Explanatory variables: pclass, sex 
#> Max depth            : 6 
#> Learning rate (eta)  : 0.3 
#> Min split loss       : 0 
#> Min child weight     : 1 
#> Sub-sample           : 1 
#> Nr of rounds (trees) : 100 
#> Early stopping rounds: 0 
#> Nr obs               : 1,043 
#> 
#> Iteration history:
#> 
#> [1]	train-auc:0.824504 
#> [2]	train-auc:0.824504 
#> Stopping. Best iteration:
#> [1]	train-auc:0.824504
gbt(titanic, "survived", c("pclass", "sex"), early_stopping_rounds = 0) %>% str()
#> List of 26
#>  $ check                : chr ""
#>  $ model                :List of 15
#>   ..$ handle         :Class 'xgb.Booster.handle' <externalptr> 
#>   ..$ raw            : raw [1:3937] 43 4f 4e 46 ...
#>   ..$ best_iteration : num 1
#>   ..$ best_ntreelimit: int 1
#>   ..$ best_score     : num 0.825
#>   ..$ best_msg       : chr "[1]\ttrain-auc:0.824504"
#>   ..$ niter          : int 2
#>   ..$ evaluation_log :Classes ‘data.table’ and 'data.frame':	2 obs. of  2 variables:
#>   .. ..$ iter     : num [1:2] 1 2
#>   .. ..$ train_auc: num [1:2] 0.825 0.825
#>   .. ..- attr(*, ".internal.selfref")=<externalptr> 
#>   ..$ call           : language xgb.train(params = params, data = dtrain, nrounds = nrounds, watchlist = watchlist,      verbose = verbose, print| __truncated__ ...
#>   ..$ params         :List of 9
#>   .. ..$ max_depth          : num 6
#>   .. ..$ learning_rate      : num 0.3
#>   .. ..$ min_split_loss     : num 0
#>   .. ..$ min_child_weight   : num 1
#>   .. ..$ subsample          : num 1
#>   .. ..$ nthread            : num 12
#>   .. ..$ objective          : chr "binary:logistic"
#>   .. ..$ eval_metric        : chr "auc"
#>   .. ..$ validate_parameters: logi TRUE
#>   ..$ callbacks      :List of 3
#>   .. ..$ cb.print.evaluation:function (env = parent.frame())  
#>   .. .. ..- attr(*, "call")= language cb.print.evaluation(period = print_every_n)
#>   .. .. ..- attr(*, "name")= chr "cb.print.evaluation"
#>   .. ..$ cb.evaluation.log  :function (env = parent.frame(), finalize = FALSE)  
#>   .. .. ..- attr(*, "call")= language cb.evaluation.log()
#>   .. .. ..- attr(*, "name")= chr "cb.evaluation.log"
#>   .. ..$ cb.early.stop      :function (env = parent.frame(), finalize = FALSE)  
#>   .. .. ..- attr(*, "call")= language cb.early.stop(stopping_rounds = early_stopping_rounds, maximize = maximize,      verbose = verbose)
#>   .. .. ..- attr(*, "name")= chr "cb.early.stop"
#>   ..$ feature_names  : chr [1:3] "pclass2nd" "pclass3rd" "sexmale"
#>   ..$ nfeatures      : int 3
#>   ..$ importance     :Classes ‘data.table’ and 'data.frame':	3 obs. of  4 variables:
#>   .. ..$ Feature  : chr [1:3] "sexmale" "pclass3rd" "pclass2nd"
#>   .. ..$ Gain     : num [1:3] 0.758 0.21 0.032
#>   .. ..$ Cover    : num [1:3] 0.435 0.435 0.13
#>   .. ..$ Frequency: num [1:3] 0.25 0.5 0.25
#>   .. ..- attr(*, ".internal.selfref")=<externalptr> 
#>   ..$ model          : tibble [1,043 × 3] (S3: tbl_df/tbl/data.frame)
#>   .. ..$ survived: Factor w/ 2 levels "Yes","No": 1 1 2 2 2 1 1 2 1 2 ...
#>   .. ..$ pclass  : Factor w/ 3 levels "1st","2nd","3rd": 1 1 1 1 1 1 1 1 1 1 ...
#>   .. ..$ sex     : Factor w/ 2 levels "female","male": 1 2 1 2 1 2 1 2 1 2 ...
#>   .. ..- attr(*, "description")= chr "## Titanic\n\nThis dataset describes the survival status of individual passengers on the Titanic. The titanic d"| __truncated__
#>   ..- attr(*, "class")= chr "xgb.Booster"
#>  $ output               : chr [1:7] "[1]\ttrain-auc:0.824504 " "Will train until train_auc hasn't improved in 0 rounds." "" "[2]\ttrain-auc:0.824504 " ...
#>  $ check_args           :function (arg, default, inp = gbt_input)  
#>  $ extra_args_names     : NULL
#>  $ extra_args           : list()
#>  $ gbt_input            :List of 10
#>   ..$ max_depth            : num 6
#>   ..$ learning_rate        : num 0.3
#>   ..$ min_split_loss       : num 0
#>   ..$ nrounds              : num 100
#>   ..$ min_child_weight     : num 1
#>   ..$ subsample            : num 1
#>   ..$ early_stopping_rounds: num 0
#>   ..$ nthread              : num 12
#>   ..$ objective            : chr "binary:logistic"
#>   ..$ eval_metric          : chr "auc"
#>  $ not_vary             : chr(0) 
#>  $ nr_obs               : int 1043
#>  $ df_name              : chr "titanic"
#>  $ vars                 : chr [1:2] "pclass" "sex"
#>  $ rvar                 : chr "survived"
#>  $ evar                 : chr [1:2] "pclass" "sex"
#>  $ type                 : chr "classification"
#>  $ lev                  : chr "Yes"
#>  $ max_depth            : num 6
#>  $ learning_rate        : num 0.3
#>  $ min_split_loss       : num 0
#>  $ min_child_weight     : num 1
#>  $ subsample            : num 1
#>  $ nrounds              : num 100
#>  $ early_stopping_rounds: num 0
#>  $ nthread              : num 12
#>  $ wts                  : NULL
#>  $ seed                 : chr NA
#>  $ data_filter          : chr ""
#>  - attr(*, "class")= chr [1:3] "gbt" "model" "list"
gbt(titanic, "survived", c("pclass", "sex"), eval_metric = paste0("error@", 0.5 / 6)) %>% str()
#> List of 26
#>  $ check                : chr ""
#>  $ model                :List of 15
#>   ..$ handle         :Class 'xgb.Booster.handle' <externalptr> 
#>   ..$ raw            : raw [1:8906] 43 4f 4e 46 ...
#>   ..$ best_iteration : num 1
#>   ..$ best_ntreelimit: int 1
#>   ..$ best_score     : num 0.593
#>   ..$ best_msg       : chr "[1]\ttrain-error@0.0833333:0.592522"
#>   ..$ niter          : int 11
#>   ..$ evaluation_log :Classes ‘data.table’ and 'data.frame':	11 obs. of  2 variables:
#>   .. ..$ iter                 : num [1:11] 1 2 3 4 5 6 7 8 9 10 ...
#>   .. ..$ train_error@0.0833333: num [1:11] 0.593 0.593 0.593 0.593 0.593 ...
#>   .. ..- attr(*, ".internal.selfref")=<externalptr> 
#>   ..$ call           : language xgb.train(params = params, data = dtrain, nrounds = nrounds, watchlist = watchlist,      verbose = verbose, print| __truncated__ ...
#>   ..$ params         :List of 9
#>   .. ..$ max_depth          : num 6
#>   .. ..$ learning_rate      : num 0.3
#>   .. ..$ min_split_loss     : num 0
#>   .. ..$ min_child_weight   : num 1
#>   .. ..$ subsample          : num 1
#>   .. ..$ nthread            : num 12
#>   .. ..$ objective          : chr "binary:logistic"
#>   .. ..$ eval_metric        : chr "error@0.0833333333333333"
#>   .. ..$ validate_parameters: logi TRUE
#>   ..$ callbacks      :List of 3
#>   .. ..$ cb.print.evaluation:function (env = parent.frame())  
#>   .. .. ..- attr(*, "call")= language cb.print.evaluation(period = print_every_n)
#>   .. .. ..- attr(*, "name")= chr "cb.print.evaluation"
#>   .. ..$ cb.evaluation.log  :function (env = parent.frame(), finalize = FALSE)  
#>   .. .. ..- attr(*, "call")= language cb.evaluation.log()
#>   .. .. ..- attr(*, "name")= chr "cb.evaluation.log"
#>   .. ..$ cb.early.stop      :function (env = parent.frame(), finalize = FALSE)  
#>   .. .. ..- attr(*, "call")= language cb.early.stop(stopping_rounds = early_stopping_rounds, maximize = maximize,      verbose = verbose)
#>   .. .. ..- attr(*, "name")= chr "cb.early.stop"
#>   ..$ feature_names  : chr [1:3] "pclass2nd" "pclass3rd" "sexmale"
#>   ..$ nfeatures      : int 3
#>   ..$ importance     :Classes ‘data.table’ and 'data.frame':	3 obs. of  4 variables:
#>   .. ..$ Feature  : chr [1:3] "sexmale" "pclass3rd" "pclass2nd"
#>   .. ..$ Gain     : num [1:3] 0.7291 0.2356 0.0353
#>   .. ..$ Cover    : num [1:3] 0.393 0.364 0.243
#>   .. ..$ Frequency: num [1:3] 0.208 0.415 0.377
#>   .. ..- attr(*, ".internal.selfref")=<externalptr> 
#>   ..$ model          : tibble [1,043 × 3] (S3: tbl_df/tbl/data.frame)
#>   .. ..$ survived: Factor w/ 2 levels "Yes","No": 1 1 2 2 2 1 1 2 1 2 ...
#>   .. ..$ pclass  : Factor w/ 3 levels "1st","2nd","3rd": 1 1 1 1 1 1 1 1 1 1 ...
#>   .. ..$ sex     : Factor w/ 2 levels "female","male": 1 2 1 2 1 2 1 2 1 2 ...
#>   .. ..- attr(*, "description")= chr "## Titanic\n\nThis dataset describes the survival status of individual passengers on the Titanic. The titanic d"| __truncated__
#>   ..- attr(*, "class")= chr "xgb.Booster"
#>  $ output               : chr [1:16] "[1]\ttrain-error@0.0833333:0.592522 " "Will train until train_error@0.0833333 hasn't improved in 10 rounds." "" "[2]\ttrain-error@0.0833333:0.592522 " ...
#>  $ check_args           :function (arg, default, inp = gbt_input)  
#>  $ extra_args_names     : chr "eval_metric"
#>  $ extra_args           :List of 1
#>   ..$ eval_metric: chr "error@0.0833333333333333"
#>  $ gbt_input            :List of 10
#>   ..$ max_depth            : num 6
#>   ..$ learning_rate        : num 0.3
#>   ..$ min_split_loss       : num 0
#>   ..$ nrounds              : num 100
#>   ..$ min_child_weight     : num 1
#>   ..$ subsample            : num 1
#>   ..$ early_stopping_rounds: num 10
#>   ..$ nthread              : num 12
#>   ..$ objective            : chr "binary:logistic"
#>   ..$ eval_metric          : chr "error@0.0833333333333333"
#>  $ not_vary             : chr(0) 
#>  $ nr_obs               : int 1043
#>  $ df_name              : chr "titanic"
#>  $ vars                 : chr [1:2] "pclass" "sex"
#>  $ rvar                 : chr "survived"
#>  $ evar                 : chr [1:2] "pclass" "sex"
#>  $ type                 : chr "classification"
#>  $ lev                  : chr "Yes"
#>  $ max_depth            : num 6
#>  $ learning_rate        : num 0.3
#>  $ min_split_loss       : num 0
#>  $ min_child_weight     : num 1
#>  $ subsample            : num 1
#>  $ nrounds              : num 100
#>  $ early_stopping_rounds: num 10
#>  $ nthread              : num 12
#>  $ wts                  : NULL
#>  $ seed                 : chr NA
#>  $ data_filter          : chr ""
#>  - attr(*, "class")= chr [1:3] "gbt" "model" "list"
gbt(diamonds, "price", c("carat", "clarity"), type = "regression") %>% summary()
#> Gradient Boosted Trees (XGBoost)
#> Type                 : Regression
#> Data                 : diamonds
#> Response variable    : price
#> Explanatory variables: carat, clarity 
#> Max depth            : 6 
#> Learning rate (eta)  : 0.3 
#> Min split loss       : 0 
#> Min child weight     : 1 
#> Sub-sample           : 1 
#> Nr of rounds (trees) : 100 
#> Early stopping rounds: 10 
#> Nr obs               : 3,000 
#> 
#> Iteration history:
#> 
#> [02:24:23] WARNING: amalgamation/../src/objective/regression_obj.cu:170: reg:linear is now deprecated in favor of reg:squarederror.
#> 
#> [2]	train-rmse:2934.489990 
#> [3]	train-rmse:2209.099365 
#> [4]	train-rmse:1731.832275 
#> [5]	train-rmse:1429.542236 
#> [6]	train-rmse:1236.558716 
#> [7]	train-rmse:1112.986206 
#> [8]	train-rmse:1041.411133 
#> [9]	train-rmse:994.494568 
#> ...
#> [91]	train-rmse:747.640930 
#> [92]	train-rmse:747.166992 
#> [93]	train-rmse:745.880615 
#> [94]	train-rmse:744.933716 
#> [95]	train-rmse:744.060425 
#> [96]	train-rmse:743.978699 
#> [97]	train-rmse:743.872742 
#> [98]	train-rmse:743.621277 
#> [99]	train-rmse:743.458862 
#> [100]	train-rmse:743.281799 
rig_wrap <- function(preds, dtrain) {
  labels <- xgboost::getinfo(dtrain, "label")
  value <- rig(preds, labels, lev = 1)
  list(metric = "rig", value = value)
}
gbt(titanic, "survived", c("pclass", "sex"), eval_metric = rig_wrap, maximize = TRUE) %>% str()
#> List of 26
#>  $ check                : chr ""
#>  $ model                :List of 15
#>   ..$ handle         :Class 'xgb.Booster.handle' <externalptr> 
#>   ..$ raw            : raw [1:8838] 43 4f 4e 46 ...
#>   ..$ best_iteration : num 1
#>   ..$ best_ntreelimit: int 1
#>   ..$ best_score     : num 0.124
#>   ..$ best_msg       : chr "[1]\ttrain-rig:0.124424"
#>   ..$ niter          : int 11
#>   ..$ evaluation_log :Classes ‘data.table’ and 'data.frame':	11 obs. of  2 variables:
#>   .. ..$ iter     : num [1:11] 1 2 3 4 5 6 7 8 9 10 ...
#>   .. ..$ train_rig: num [1:11] 0.124 0.124 0.124 0.124 0.124 ...
#>   .. ..- attr(*, ".internal.selfref")=<externalptr> 
#>   ..$ call           : language xgb.train(params = params, data = dtrain, nrounds = nrounds, watchlist = watchlist,      verbose = verbose, print| __truncated__ ...
#>   ..$ params         :List of 8
#>   .. ..$ max_depth          : num 6
#>   .. ..$ learning_rate      : num 0.3
#>   .. ..$ min_split_loss     : num 0
#>   .. ..$ min_child_weight   : num 1
#>   .. ..$ subsample          : num 1
#>   .. ..$ nthread            : num 12
#>   .. ..$ objective          : chr "binary:logistic"
#>   .. ..$ validate_parameters: logi TRUE
#>   ..$ callbacks      :List of 3
#>   .. ..$ cb.print.evaluation:function (env = parent.frame())  
#>   .. .. ..- attr(*, "call")= language cb.print.evaluation(period = print_every_n)
#>   .. .. ..- attr(*, "name")= chr "cb.print.evaluation"
#>   .. ..$ cb.evaluation.log  :function (env = parent.frame(), finalize = FALSE)  
#>   .. .. ..- attr(*, "call")= language cb.evaluation.log()
#>   .. .. ..- attr(*, "name")= chr "cb.evaluation.log"
#>   .. ..$ cb.early.stop      :function (env = parent.frame(), finalize = FALSE)  
#>   .. .. ..- attr(*, "call")= language cb.early.stop(stopping_rounds = early_stopping_rounds, maximize = maximize,      verbose = verbose)
#>   .. .. ..- attr(*, "name")= chr "cb.early.stop"
#>   ..$ feature_names  : chr [1:3] "pclass2nd" "pclass3rd" "sexmale"
#>   ..$ nfeatures      : int 3
#>   ..$ importance     :Classes ‘data.table’ and 'data.frame':	3 obs. of  4 variables:
#>   .. ..$ Feature  : chr [1:3] "sexmale" "pclass3rd" "pclass2nd"
#>   .. ..$ Gain     : num [1:3] 0.7291 0.2356 0.0353
#>   .. ..$ Cover    : num [1:3] 0.393 0.364 0.243
#>   .. ..$ Frequency: num [1:3] 0.208 0.415 0.377
#>   .. ..- attr(*, ".internal.selfref")=<externalptr> 
#>   ..$ model          : tibble [1,043 × 3] (S3: tbl_df/tbl/data.frame)
#>   .. ..$ survived: Factor w/ 2 levels "Yes","No": 1 1 2 2 2 1 1 2 1 2 ...
#>   .. ..$ pclass  : Factor w/ 3 levels "1st","2nd","3rd": 1 1 1 1 1 1 1 1 1 1 ...
#>   .. ..$ sex     : Factor w/ 2 levels "female","male": 1 2 1 2 1 2 1 2 1 2 ...
#>   .. ..- attr(*, "description")= chr "## Titanic\n\nThis dataset describes the survival status of individual passengers on the Titanic. The titanic d"| __truncated__
#>   ..- attr(*, "class")= chr "xgb.Booster"
#>  $ output               : chr [1:16] "[1]\ttrain-rig:0.124424 " "Will train until train_rig hasn't improved in 10 rounds." "" "[2]\ttrain-rig:0.124424 " ...
#>  $ check_args           :function (arg, default, inp = gbt_input)  
#>  $ extra_args_names     : chr [1:2] "eval_metric" "maximize"
#>  $ extra_args           :List of 2
#>   ..$ eval_metric:function (preds, dtrain)  
#>   .. ..- attr(*, "srcref")= 'srcref' int [1:8] 9 13 13 1 13 1 9 13
#>   .. .. ..- attr(*, "srcfile")=Classes 'srcfilecopy', 'srcfile' <environment: 0x10791bf0> 
#>   ..$ maximize   : logi TRUE
#>  $ gbt_input            :List of 11
#>   ..$ max_depth            : num 6
#>   ..$ learning_rate        : num 0.3
#>   ..$ min_split_loss       : num 0
#>   ..$ nrounds              : num 100
#>   ..$ min_child_weight     : num 1
#>   ..$ subsample            : num 1
#>   ..$ early_stopping_rounds: num 10
#>   ..$ nthread              : num 12
#>   ..$ objective            : chr "binary:logistic"
#>   ..$ eval_metric          :function (preds, dtrain)  
#>   .. ..- attr(*, "srcref")= 'srcref' int [1:8] 9 13 13 1 13 1 9 13
#>   .. .. ..- attr(*, "srcfile")=Classes 'srcfilecopy', 'srcfile' <environment: 0x10791bf0> 
#>   ..$ maximize             : logi TRUE
#>  $ not_vary             : chr(0) 
#>  $ nr_obs               : int 1043
#>  $ df_name              : chr "titanic"
#>  $ vars                 : chr [1:2] "pclass" "sex"
#>  $ rvar                 : chr "survived"
#>  $ evar                 : chr [1:2] "pclass" "sex"
#>  $ type                 : chr "classification"
#>  $ lev                  : chr "Yes"
#>  $ max_depth            : num 6
#>  $ learning_rate        : num 0.3
#>  $ min_split_loss       : num 0
#>  $ min_child_weight     : num 1
#>  $ subsample            : num 1
#>  $ nrounds              : num 100
#>  $ early_stopping_rounds: num 10
#>  $ nthread              : num 12
#>  $ wts                  : NULL
#>  $ seed                 : chr NA
#>  $ data_filter          : chr ""
#>  - attr(*, "class")= chr [1:3] "gbt" "model" "list"

Gradient Boosted Trees using XGBoost

Arguments

Value

Details

See also

Examples