Evaluate if sample data for a categorical variable is consistent with a hypothesized distribution

goodness(
  dataset,
  var,
  p = NULL,
  tab = NULL,
  data_filter = "",
  envir = parent.frame()
)

Arguments

dataset

Dataset

var

A categorical variable

p

Hypothesized distribution as a number, fraction, or numeric vector. If unspecified, defaults to an even distribution

tab

Table with frequencies as alternative to dataset

data_filter

Expression entered in, e.g., Data > View to filter the dataset in Radiant. The expression should be a string (e.g., "price > 10000")

envir

Environment to extract data from

Value

A list of all variables used in goodness as an object of class goodness

Details

See https://radiant-rstats.github.io/docs/basics/goodness.html for an example in Radiant

See also

summary.goodness to summarize results

plot.goodness to plot results

Examples

goodness(newspaper, "Income") %>% str()
#> List of 8 #> $ elow : int 0 #> $ res : tibble [1 × 4] (S3: tbl_df/tbl/data.frame) #> ..$ statistic: Named num 32.8 #> .. ..- attr(*, "names")= chr "X-squared" #> ..$ p.value : num 1e-08 #> ..$ parameter: int 1 #> ..$ method : chr "Chi-squared test for given probabilities" #> $ cst :List of 10 #> ..$ statistic: Named num 32.8 #> .. ..- attr(*, "names")= chr "X-squared" #> ..$ parameter: Named num 1 #> .. ..- attr(*, "names")= chr "df" #> ..$ p.value : num 1e-08 #> ..$ method : chr "Chi-squared test for given probabilities" #> ..$ data.name: chr "tab" #> ..$ observed : 'table' num [1:2(1d)] 359 221 #> .. ..- attr(*, "dimnames")=List of 1 #> .. .. ..$ : chr [1:2] "Low Income" "High Income" #> ..$ expected : Named num [1:2] 290 290 #> .. ..- attr(*, "names")= chr [1:2] "Low Income" "High Income" #> ..$ residuals: 'table' num [1:2(1d)] 4.05 -4.05 #> .. ..- attr(*, "dimnames")=List of 1 #> .. .. ..$ : chr [1:2] "Low Income" "High Income" #> ..$ stdres : 'table' num [1:2(1d)] 5.73 -5.73 #> .. ..- attr(*, "dimnames")=List of 1 #> .. .. ..$ : chr [1:2] "Low Income" "High Income" #> ..$ chi_sq : 'table' num [1:2(1d)] 16.4 16.4 #> .. ..- attr(*, "dimnames")=List of 1 #> .. .. ..$ : chr [1:2] "Low Income" "High Income" #> ..- attr(*, "class")= chr "htest" #> $ df_name : chr "newspaper" #> $ var : chr "Income" #> $ p : num [1:2] 0.5 0.5 #> $ tab : 'table' num [1:2(1d)] 359 221 #> ..- attr(*, "dimnames")=List of 1 #> .. ..$ : chr [1:2] "Low Income" "High Income" #> $ data_filter: chr "" #> - attr(*, "class")= chr [1:2] "goodness" "list"
goodness(newspaper, "Income", p = c(3/4, 1/4)) %>% str()
#> List of 8 #> $ elow : int 0 #> $ res : tibble [1 × 4] (S3: tbl_df/tbl/data.frame) #> ..$ statistic: Named num 53.1 #> .. ..- attr(*, "names")= chr "X-squared" #> ..$ p.value : num 3.15e-13 #> ..$ parameter: int 1 #> ..$ method : chr "Chi-squared test for given probabilities" #> $ cst :List of 10 #> ..$ statistic: Named num 53.1 #> .. ..- attr(*, "names")= chr "X-squared" #> ..$ parameter: Named num 1 #> .. ..- attr(*, "names")= chr "df" #> ..$ p.value : num 3.15e-13 #> ..$ method : chr "Chi-squared test for given probabilities" #> ..$ data.name: chr "tab" #> ..$ observed : 'table' num [1:2(1d)] 359 221 #> .. ..- attr(*, "dimnames")=List of 1 #> .. .. ..$ : chr [1:2] "Low Income" "High Income" #> ..$ expected : Named num [1:2] 435 145 #> .. ..- attr(*, "names")= chr [1:2] "Low Income" "High Income" #> ..$ residuals: 'table' num [1:2(1d)] -3.64 6.31 #> .. ..- attr(*, "dimnames")=List of 1 #> .. .. ..$ : chr [1:2] "Low Income" "High Income" #> ..$ stdres : 'table' num [1:2(1d)] -7.29 7.29 #> .. ..- attr(*, "dimnames")=List of 1 #> .. .. ..$ : chr [1:2] "Low Income" "High Income" #> ..$ chi_sq : 'table' num [1:2(1d)] 13.3 39.8 #> .. ..- attr(*, "dimnames")=List of 1 #> .. .. ..$ : chr [1:2] "Low Income" "High Income" #> ..- attr(*, "class")= chr "htest" #> $ df_name : chr "newspaper" #> $ var : chr "Income" #> $ p : num [1:2] 0.75 0.25 #> $ tab : 'table' num [1:2(1d)] 359 221 #> ..- attr(*, "dimnames")=List of 1 #> .. ..$ : chr [1:2] "Low Income" "High Income" #> $ data_filter: chr "" #> - attr(*, "class")= chr [1:2] "goodness" "list"
table(select(newspaper, Income)) %>% goodness(tab = .)
#> $elow #> [1] 0 #> #> $res #> # A tibble: 1 x 4 #> statistic p.value parameter method #> <dbl> <dbl> <int> <chr> #> 1 32.8 0.0000000100 1 Chi-squared test for given probabilities #> #> $cst #> #> Chi-squared test for given probabilities #> #> data: tab #> X-squared = 32.834, df = 1, p-value = 1.003e-08 #> #> #> $df_name #> [1] "." #> #> $var #> [1] "variable" #> #> $p #> [1] 0.5 0.5 #> #> $tab #> #> Low Income High Income #> 359 221 #> #> $data_filter #> [1] "" #> #> attr(,"class") #> [1] "goodness" "list"