Classification and regression trees based on the rpart package
crtree( dataset, rvar, evar, type = "", lev = "", wts = "None", minsplit = 2, minbucket = round(minsplit/3), cp = 0.001, pcp = NA, nodes = NA, K = 10, seed = 1234, split = "gini", prior = NA, adjprob = TRUE, cost = NA, margin = NA, check = "", data_filter = "", envir = parent.frame() )
dataset | Dataset |
---|---|
rvar | The response variable in the model |
evar | Explanatory variables in the model |
type | Model type (i.e., "classification" or "regression") |
lev | The level in the response variable defined as _success_ |
wts | Weights to use in estimation |
minsplit | The minimum number of observations that must exist in a node in order for a split to be attempted. |
minbucket | the minimum number of observations in any terminal <leaf> node. If only one of minbucket or minsplit is specified, the code either sets minsplit to minbucket*3 or minbucket to minsplit/3, as appropriate. |
cp | Minimum proportion of root node deviance required for split (default = 0.001) |
pcp | Complexity parameter to use for pruning |
nodes | Maximum size of tree in number of nodes to return |
K | Number of folds use in cross-validation |
seed | Random seed used for cross-validation |
split | Splitting criterion to use (i.e., "gini" or "information") |
prior | Adjust the initial probability for the selected level (e.g., set to .5 in unbalanced samples) |
adjprob | Setting a prior will rescale the predicted probabilities. Set adjprob to TRUE to adjust the probabilities back to their original scale after estimation |
cost | Cost for each treatment (e.g., mailing) |
margin | Margin associated with a successful treatment (e.g., a purchase) |
check | Optional estimation parameters (e.g., "standardize") |
data_filter | Expression entered in, e.g., Data > View to filter the dataset in Radiant. The expression should be a string (e.g., "price > 10000") |
envir | Environment to extract data from |
A list with all variables defined in crtree as an object of class tree
See https://radiant-rstats.github.io/docs/model/crtree.html for an example in Radiant
summary.crtree
to summarize results
plot.crtree
to plot results
predict.crtree
for prediction
#> Classification tree #> Data : titanic #> Response variable : survived #> Level : Yes in survived #> Explanatory variables: pclass, sex #> Complexity parameter : 0.001 #> Minimum observations : 2 #> Nr obs : 1,043 #> #> node), split, n, loss, yval, (yprob) #> * denotes terminal node #> #> 1) root 1043 425 No (0.40747843 0.59252157) #> 2) sex=female 386 96 Yes (0.75129534 0.24870466) #> 4) pclass=1st,2nd 234 16 Yes (0.93162393 0.06837607) * #> 5) pclass=3rd 152 72 No (0.47368421 0.52631579) * #> 3) sex=male 657 135 No (0.20547945 0.79452055) *#> Classification tree #> Data : titanic #> Response variable : survived #> Level : Yes in survived #> Explanatory variables: pclass, sex #> Complexity parameter : 0.001 #> Minimum observations : 2 #> Nr obs : 1,043 #> #> node), split, n, loss, yval, (yprob) #> * denotes terminal node #> #> 1) root 1043 425 No (0.40747843 0.59252157) #> 2) sex=female 386 96 Yes (0.75129534 0.24870466) #> 4) pclass=1st,2nd 234 16 Yes (0.93162393 0.06837607) * #> 5) pclass=3rd 152 72 No (0.47368421 0.52631579) * #> 3) sex=male 657 135 No (0.20547945 0.79452055) *#> List of 29 #> $ model :List of 18 #> ..$ frame :'data.frame': 41 obs. of 8 variables: #> .. ..$ var : chr [1:41] "carat" "carat" "carat" "<leaf>" ... #> .. ..$ n : int [1:41] 3000 1935 1392 976 416 543 385 158 1065 715 ... #> .. ..$ wt : num [1:41] 3000 1935 1392 976 416 ... #> .. ..$ dev : num [1:41] 4.70e+10 2.37e+09 3.80e+08 5.37e+07 8.61e+07 ... #> .. ..$ yval : num [1:41] 3907 1618 1050 779 1687 ... #> .. ..$ complexity: num [1:41] 0.608124 0.03408 0.005122 0.00032 0.000518 ... #> .. ..$ ncompete : int [1:41] 1 1 1 0 0 1 0 0 1 1 ... #> .. ..$ nsurrogate: int [1:41] 1 1 0 0 0 0 0 0 0 0 ... #> ..$ where : Named int [1:3000] 4 4 4 4 4 5 8 8 5 14 ... #> .. ..- attr(*, "names")= chr [1:3000] "1" "2" "3" "4" ... #> ..$ call : language (function (formula, data, weights, subset, na.action = na.rpart, method, model = FALSE, x = FALSE, y = TRUE,| __truncated__ ... #> ..$ terms :Classes 'terms', 'formula' language price ~ carat + clarity #> .. .. ..- attr(*, "variables")= language list(price, carat, clarity) #> .. .. ..- attr(*, "factors")= int [1:3, 1:2] 0 1 0 0 0 1 #> .. .. .. ..- attr(*, "dimnames")=List of 2 #> .. .. .. .. ..$ : chr [1:3] "price" "carat" "clarity" #> .. .. .. .. ..$ : chr [1:2] "carat" "clarity" #> .. .. ..- attr(*, "term.labels")= chr [1:2] "carat" "clarity" #> .. .. ..- attr(*, "order")= int [1:2] 1 1 #> .. .. ..- attr(*, "intercept")= int 1 #> .. .. ..- attr(*, "response")= int 1 #> .. .. ..- attr(*, ".Environment")=<environment: 0x10a71ec0> #> .. .. ..- attr(*, "predvars")= language list(price, carat, clarity) #> .. .. ..- attr(*, "dataClasses")= Named chr [1:3] "numeric" "numeric" "factor" #> .. .. .. ..- attr(*, "names")= chr [1:3] "price" "carat" "clarity" #> ..$ cptable : num [1:21, 1:5] 0.6081 0.1838 0.0341 0.0299 0.0292 ... #> .. ..- attr(*, "dimnames")=List of 2 #> .. .. ..$ : chr [1:21] "1" "2" "3" "4" ... #> .. .. ..$ : chr [1:5] "CP" "nsplit" "rel error" "xerror" ... #> ..$ method : chr "anova" #> ..$ parms : NULL #> ..$ control :List of 9 #> .. ..$ minsplit : num 2 #> .. ..$ minbucket : num 1 #> .. ..$ cp : num 0.001 #> .. ..$ maxcompete : int 4 #> .. ..$ maxsurrogate : int 5 #> .. ..$ usesurrogate : int 2 #> .. ..$ surrogatestyle: int 0 #> .. ..$ maxdepth : int 30 #> .. ..$ xval : num 10 #> ..$ functions :List of 2 #> .. ..$ summary:function (yval, dev, wt, ylevel, digits) #> .. ..$ text :function (yval, dev, wt, ylevel, digits, n, use.n) #> ..$ numresp : int 1 #> ..$ splits : num [1:44, 1:5] 3000 3000 0 1935 1935 ... #> .. ..- attr(*, "dimnames")=List of 2 #> .. .. ..$ : chr [1:44] "carat" "clarity" "clarity" "carat" ... #> .. .. ..$ : chr [1:5] "count" "ncat" "improve" "index" ... #> ..$ csplit : int [1:21, 1:8] 1 3 3 3 2 1 1 1 1 1 ... #> ..$ variable.importance: Named num [1:2] 4.10e+10 5.97e+09 #> .. ..- attr(*, "names")= chr [1:2] "carat" "clarity" #> ..$ y : Named int [1:3000] 580 650 630 706 1080 3082 3328 4229 1895 3546 ... #> .. ..- attr(*, "names")= chr [1:3000] "1" "2" "3" "4" ... #> ..$ ordered : Named logi [1:2] FALSE FALSE #> .. ..- attr(*, "names")= chr [1:2] "carat" "clarity" #> ..$ residuals : Named num [1:3000] -199 -129 -149 -73 301 ... #> .. ..- attr(*, "names")= chr [1:3000] "1" "2" "3" "4" ... #> ..$ model : tibble [3,000 × 3] (S3: tbl_df/tbl/data.frame) #> .. ..$ price : int [1:3000] 580 650 630 706 1080 3082 3328 4229 1895 3546 ... #> .. ..$ carat : num [1:3000] 0.32 0.34 0.3 0.35 0.4 0.6 0.88 0.93 0.51 1.01 ... #> .. ..$ clarity: Factor w/ 8 levels "I1","SI2","SI1",..: 5 3 4 6 4 7 3 3 6 2 ... #> .. ..- attr(*, "description")= chr "## Diamond prices\n\nPrices of 3,000 round cut diamonds\n\n### Description\n\nA dataset containing the prices a"| __truncated__ #> ..$ var_types : Named chr [1:3] "integer" "numeric" "factor" #> .. ..- attr(*, "names")= chr [1:3] "price" "carat" "clarity" #> ..- attr(*, "xlevels")=List of 1 #> .. ..$ clarity: chr [1:8] "I1" "SI2" "SI1" "VS2" ... #> ..- attr(*, "class")= chr "rpart" #> $ crtree_input:List of 6 #> ..$ formula:Class 'formula' language price ~ . #> .. .. ..- attr(*, ".Environment")=<environment: 0x10a71ec0> #> ..$ data : tibble [3,000 × 3] (S3: tbl_df/tbl/data.frame) #> .. ..$ price : int [1:3000] 580 650 630 706 1080 3082 3328 4229 1895 3546 ... #> .. ..$ carat : num [1:3000] 0.32 0.34 0.3 0.35 0.4 0.6 0.88 0.93 0.51 1.01 ... #> .. ..$ clarity: Factor w/ 8 levels "I1","SI2","SI1",..: 5 3 4 6 4 7 3 3 6 2 ... #> .. ..- attr(*, "description")= chr "## Diamond prices\n\nPrices of 3,000 round cut diamonds\n\n### Description\n\nA dataset containing the prices a"| __truncated__ #> ..$ method : chr "anova" #> ..$ parms :List of 1 #> .. ..$ split: chr "gini" #> ..$ weights: NULL #> ..$ control:List of 9 #> .. ..$ minsplit : num 2 #> .. ..$ minbucket : num 1 #> .. ..$ cp : num 0.001 #> .. ..$ maxcompete : int 4 #> .. ..$ maxsurrogate : int 5 #> .. ..$ usesurrogate : int 2 #> .. ..$ surrogatestyle: int 0 #> .. ..$ maxdepth : int 30 #> .. ..$ xval : num 10 #> $ parms :List of 1 #> ..$ split: chr "gini" #> $ control :List of 9 #> ..$ minsplit : num 2 #> ..$ minbucket : num 1 #> ..$ cp : num 0.001 #> ..$ maxcompete : int 4 #> ..$ maxsurrogate : int 5 #> ..$ usesurrogate : int 2 #> ..$ surrogatestyle: int 0 #> ..$ maxdepth : int 30 #> ..$ xval : num 10 #> $ form : chr "price ~ . " #> $ method : chr "anova" #> $ rv : int [1:3000] 580 650 630 706 1080 3082 3328 4229 1895 3546 ... #> $ not_vary : chr(0) #> $ df_name : chr "diamonds" #> $ vars : chr [1:2] "carat" "clarity" #> $ rvar : chr "price" #> $ evar : chr [1:2] "carat" "clarity" #> $ type : chr "regression" #> $ lev : chr "" #> $ wts : NULL #> $ minsplit : num 2 #> $ minbucket : num 1 #> $ cp : num 0.001 #> $ pcp : logi NA #> $ nodes : logi NA #> $ K : num 10 #> $ seed : num 1234 #> $ split : chr "gini" #> $ prior : logi NA #> $ adjprob : logi TRUE #> $ cost : logi NA #> $ margin : logi NA #> $ check : chr "" #> $ data_filter : chr "" #> - attr(*, "class")= chr [1:3] "crtree" "model" "list"