Classification and regression trees based on the rpart package

crtree(
  dataset,
  rvar,
  evar,
  type = "",
  lev = "",
  wts = "None",
  minsplit = 2,
  minbucket = round(minsplit/3),
  cp = 0.001,
  pcp = NA,
  nodes = NA,
  K = 10,
  seed = 1234,
  split = "gini",
  prior = NA,
  adjprob = TRUE,
  cost = NA,
  margin = NA,
  check = "",
  data_filter = "",
  envir = parent.frame()
)

Arguments

dataset

Dataset

rvar

The response variable in the model

evar

Explanatory variables in the model

type

Model type (i.e., "classification" or "regression")

lev

The level in the response variable defined as _success_

wts

Weights to use in estimation

minsplit

The minimum number of observations that must exist in a node in order for a split to be attempted.

minbucket

the minimum number of observations in any terminal <leaf> node. If only one of minbucket or minsplit is specified, the code either sets minsplit to minbucket*3 or minbucket to minsplit/3, as appropriate.

cp

Minimum proportion of root node deviance required for split (default = 0.001)

pcp

Complexity parameter to use for pruning

nodes

Maximum size of tree in number of nodes to return

K

Number of folds use in cross-validation

seed

Random seed used for cross-validation

split

Splitting criterion to use (i.e., "gini" or "information")

prior

Adjust the initial probability for the selected level (e.g., set to .5 in unbalanced samples)

adjprob

Setting a prior will rescale the predicted probabilities. Set adjprob to TRUE to adjust the probabilities back to their original scale after estimation

cost

Cost for each treatment (e.g., mailing)

margin

Margin associated with a successful treatment (e.g., a purchase)

check

Optional estimation parameters (e.g., "standardize")

data_filter

Expression entered in, e.g., Data > View to filter the dataset in Radiant. The expression should be a string (e.g., "price > 10000")

envir

Environment to extract data from

Value

A list with all variables defined in crtree as an object of class tree

Details

See https://radiant-rstats.github.io/docs/model/crtree.html for an example in Radiant

See also

summary.crtree to summarize results

plot.crtree to plot results

predict.crtree for prediction

Examples

crtree(titanic, "survived", c("pclass", "sex"), lev = "Yes") %>% summary()
#> Classification tree #> Data : titanic #> Response variable : survived #> Level : Yes in survived #> Explanatory variables: pclass, sex #> Complexity parameter : 0.001 #> Minimum observations : 2 #> Nr obs : 1,043 #> #> node), split, n, loss, yval, (yprob) #> * denotes terminal node #> #> 1) root 1043 425 No (0.40747843 0.59252157) #> 2) sex=female 386 96 Yes (0.75129534 0.24870466) #> 4) pclass=1st,2nd 234 16 Yes (0.93162393 0.06837607) * #> 5) pclass=3rd 152 72 No (0.47368421 0.52631579) * #> 3) sex=male 657 135 No (0.20547945 0.79452055) *
result <- crtree(titanic, "survived", c("pclass", "sex")) %>% summary()
#> Classification tree #> Data : titanic #> Response variable : survived #> Level : Yes in survived #> Explanatory variables: pclass, sex #> Complexity parameter : 0.001 #> Minimum observations : 2 #> Nr obs : 1,043 #> #> node), split, n, loss, yval, (yprob) #> * denotes terminal node #> #> 1) root 1043 425 No (0.40747843 0.59252157) #> 2) sex=female 386 96 Yes (0.75129534 0.24870466) #> 4) pclass=1st,2nd 234 16 Yes (0.93162393 0.06837607) * #> 5) pclass=3rd 152 72 No (0.47368421 0.52631579) * #> 3) sex=male 657 135 No (0.20547945 0.79452055) *
result <- crtree(diamonds, "price", c("carat", "clarity"), type = "regression") %>% str()
#> List of 29 #> $ model :List of 18 #> ..$ frame :'data.frame': 41 obs. of 8 variables: #> .. ..$ var : chr [1:41] "carat" "carat" "carat" "<leaf>" ... #> .. ..$ n : int [1:41] 3000 1935 1392 976 416 543 385 158 1065 715 ... #> .. ..$ wt : num [1:41] 3000 1935 1392 976 416 ... #> .. ..$ dev : num [1:41] 4.70e+10 2.37e+09 3.80e+08 5.37e+07 8.61e+07 ... #> .. ..$ yval : num [1:41] 3907 1618 1050 779 1687 ... #> .. ..$ complexity: num [1:41] 0.608124 0.03408 0.005122 0.00032 0.000518 ... #> .. ..$ ncompete : int [1:41] 1 1 1 0 0 1 0 0 1 1 ... #> .. ..$ nsurrogate: int [1:41] 1 1 0 0 0 0 0 0 0 0 ... #> ..$ where : Named int [1:3000] 4 4 4 4 4 5 8 8 5 14 ... #> .. ..- attr(*, "names")= chr [1:3000] "1" "2" "3" "4" ... #> ..$ call : language (function (formula, data, weights, subset, na.action = na.rpart, method, model = FALSE, x = FALSE, y = TRUE,| __truncated__ ... #> ..$ terms :Classes 'terms', 'formula' language price ~ carat + clarity #> .. .. ..- attr(*, "variables")= language list(price, carat, clarity) #> .. .. ..- attr(*, "factors")= int [1:3, 1:2] 0 1 0 0 0 1 #> .. .. .. ..- attr(*, "dimnames")=List of 2 #> .. .. .. .. ..$ : chr [1:3] "price" "carat" "clarity" #> .. .. .. .. ..$ : chr [1:2] "carat" "clarity" #> .. .. ..- attr(*, "term.labels")= chr [1:2] "carat" "clarity" #> .. .. ..- attr(*, "order")= int [1:2] 1 1 #> .. .. ..- attr(*, "intercept")= int 1 #> .. .. ..- attr(*, "response")= int 1 #> .. .. ..- attr(*, ".Environment")=<environment: 0x10a71ec0> #> .. .. ..- attr(*, "predvars")= language list(price, carat, clarity) #> .. .. ..- attr(*, "dataClasses")= Named chr [1:3] "numeric" "numeric" "factor" #> .. .. .. ..- attr(*, "names")= chr [1:3] "price" "carat" "clarity" #> ..$ cptable : num [1:21, 1:5] 0.6081 0.1838 0.0341 0.0299 0.0292 ... #> .. ..- attr(*, "dimnames")=List of 2 #> .. .. ..$ : chr [1:21] "1" "2" "3" "4" ... #> .. .. ..$ : chr [1:5] "CP" "nsplit" "rel error" "xerror" ... #> ..$ method : chr "anova" #> ..$ parms : NULL #> ..$ control :List of 9 #> .. ..$ minsplit : num 2 #> .. ..$ minbucket : num 1 #> .. ..$ cp : num 0.001 #> .. ..$ maxcompete : int 4 #> .. ..$ maxsurrogate : int 5 #> .. ..$ usesurrogate : int 2 #> .. ..$ surrogatestyle: int 0 #> .. ..$ maxdepth : int 30 #> .. ..$ xval : num 10 #> ..$ functions :List of 2 #> .. ..$ summary:function (yval, dev, wt, ylevel, digits) #> .. ..$ text :function (yval, dev, wt, ylevel, digits, n, use.n) #> ..$ numresp : int 1 #> ..$ splits : num [1:44, 1:5] 3000 3000 0 1935 1935 ... #> .. ..- attr(*, "dimnames")=List of 2 #> .. .. ..$ : chr [1:44] "carat" "clarity" "clarity" "carat" ... #> .. .. ..$ : chr [1:5] "count" "ncat" "improve" "index" ... #> ..$ csplit : int [1:21, 1:8] 1 3 3 3 2 1 1 1 1 1 ... #> ..$ variable.importance: Named num [1:2] 4.10e+10 5.97e+09 #> .. ..- attr(*, "names")= chr [1:2] "carat" "clarity" #> ..$ y : Named int [1:3000] 580 650 630 706 1080 3082 3328 4229 1895 3546 ... #> .. ..- attr(*, "names")= chr [1:3000] "1" "2" "3" "4" ... #> ..$ ordered : Named logi [1:2] FALSE FALSE #> .. ..- attr(*, "names")= chr [1:2] "carat" "clarity" #> ..$ residuals : Named num [1:3000] -199 -129 -149 -73 301 ... #> .. ..- attr(*, "names")= chr [1:3000] "1" "2" "3" "4" ... #> ..$ model : tibble [3,000 × 3] (S3: tbl_df/tbl/data.frame) #> .. ..$ price : int [1:3000] 580 650 630 706 1080 3082 3328 4229 1895 3546 ... #> .. ..$ carat : num [1:3000] 0.32 0.34 0.3 0.35 0.4 0.6 0.88 0.93 0.51 1.01 ... #> .. ..$ clarity: Factor w/ 8 levels "I1","SI2","SI1",..: 5 3 4 6 4 7 3 3 6 2 ... #> .. ..- attr(*, "description")= chr "## Diamond prices\n\nPrices of 3,000 round cut diamonds\n\n### Description\n\nA dataset containing the prices a"| __truncated__ #> ..$ var_types : Named chr [1:3] "integer" "numeric" "factor" #> .. ..- attr(*, "names")= chr [1:3] "price" "carat" "clarity" #> ..- attr(*, "xlevels")=List of 1 #> .. ..$ clarity: chr [1:8] "I1" "SI2" "SI1" "VS2" ... #> ..- attr(*, "class")= chr "rpart" #> $ crtree_input:List of 6 #> ..$ formula:Class 'formula' language price ~ . #> .. .. ..- attr(*, ".Environment")=<environment: 0x10a71ec0> #> ..$ data : tibble [3,000 × 3] (S3: tbl_df/tbl/data.frame) #> .. ..$ price : int [1:3000] 580 650 630 706 1080 3082 3328 4229 1895 3546 ... #> .. ..$ carat : num [1:3000] 0.32 0.34 0.3 0.35 0.4 0.6 0.88 0.93 0.51 1.01 ... #> .. ..$ clarity: Factor w/ 8 levels "I1","SI2","SI1",..: 5 3 4 6 4 7 3 3 6 2 ... #> .. ..- attr(*, "description")= chr "## Diamond prices\n\nPrices of 3,000 round cut diamonds\n\n### Description\n\nA dataset containing the prices a"| __truncated__ #> ..$ method : chr "anova" #> ..$ parms :List of 1 #> .. ..$ split: chr "gini" #> ..$ weights: NULL #> ..$ control:List of 9 #> .. ..$ minsplit : num 2 #> .. ..$ minbucket : num 1 #> .. ..$ cp : num 0.001 #> .. ..$ maxcompete : int 4 #> .. ..$ maxsurrogate : int 5 #> .. ..$ usesurrogate : int 2 #> .. ..$ surrogatestyle: int 0 #> .. ..$ maxdepth : int 30 #> .. ..$ xval : num 10 #> $ parms :List of 1 #> ..$ split: chr "gini" #> $ control :List of 9 #> ..$ minsplit : num 2 #> ..$ minbucket : num 1 #> ..$ cp : num 0.001 #> ..$ maxcompete : int 4 #> ..$ maxsurrogate : int 5 #> ..$ usesurrogate : int 2 #> ..$ surrogatestyle: int 0 #> ..$ maxdepth : int 30 #> ..$ xval : num 10 #> $ form : chr "price ~ . " #> $ method : chr "anova" #> $ rv : int [1:3000] 580 650 630 706 1080 3082 3328 4229 1895 3546 ... #> $ not_vary : chr(0) #> $ df_name : chr "diamonds" #> $ vars : chr [1:2] "carat" "clarity" #> $ rvar : chr "price" #> $ evar : chr [1:2] "carat" "clarity" #> $ type : chr "regression" #> $ lev : chr "" #> $ wts : NULL #> $ minsplit : num 2 #> $ minbucket : num 1 #> $ cp : num 0.001 #> $ pcp : logi NA #> $ nodes : logi NA #> $ K : num 10 #> $ seed : num 1234 #> $ split : chr "gini" #> $ prior : logi NA #> $ adjprob : logi TRUE #> $ cost : logi NA #> $ margin : logi NA #> $ check : chr "" #> $ data_filter : chr "" #> - attr(*, "class")= chr [1:3] "crtree" "model" "list"