Utility functions for factors and compositional data
find_max.Rd
Utility functions for factors and compositional data.
Usage
compare_sets(x, y)
find_max(x)
find_min(x)
reclass(x, map, all = FALSE, allow_NA = FALSE)
redistribute(x, source, target = NULL)
Arguments
- x, y
any type for
compare_sets
, matrix forfind_max
,find_min
, andredistribute
, a factor forreclass
.- map
a reclassification matrix with 2 columns (1st: original levels, 2nd: output levels mapped to original levels).
- all
logical, whether all levels from mapping matrix should be applied on the return object.
- allow_NA
logical, whether
NA
s are allowed as part ofmap
.- source
numeric or character, single column index for input matrix
x
.- target
numeric or character, column index or indices for input matrix
x
.
Value
A matrix compare_sets
.
A data frame for find_max
and find_min
.
A reclassified factor for reclass
.
A matrix for redistribute
where the source column values are
redistributed among the target columns proportionally.
Examples
## numeric vector
compare_sets(1:10, 8:15)
#> xlength ylength intersect union xbutnoty ybutnotx
#> labels 10 8 3 15 7 5
#> unique 10 8 3 15 7 5
## factor with 'zombie' labels
compare_sets(factor(1:10, levels=1:10), factor(8:15, levels=1:15))
#> xlength ylength intersect union xbutnoty ybutnotx
#> labels 10 15 10 15 0 5
#> unique 10 8 3 15 7 5
(mat <- matrix(rnorm(10*5), 10, 5))
#> [,1] [,2] [,3] [,4] [,5]
#> [1,] 1.1484116 -1.86301149 -0.09744510 -0.13399701 2.75541758
#> [2,] -1.8218177 -0.52201251 -0.93584735 -1.91008747 0.04653138
#> [3,] -0.2473253 -0.05260191 -0.01595031 -0.27923724 0.57770907
#> [4,] -0.2441996 0.54299634 -0.82678895 -0.31344598 0.11819487
#> [5,] -0.2827054 -0.91407483 -1.51239965 1.06730788 -1.91172049
#> [6,] -0.5536994 0.46815442 0.93536319 0.07003485 0.86208648
#> [7,] 0.6289820 0.36295126 0.17648861 -0.63912332 -0.24323674
#> [8,] 2.0650249 -1.30454355 0.24368546 -0.04996490 -0.20608719
#> [9,] -1.6309894 0.73777632 1.62354888 -0.25148344 0.01917759
#> [10,] 0.5124269 1.88850493 0.11203808 0.44479712 0.02956075
(m <- find_max(mat))
#> index value
#> 1 X5 2.75541758
#> 2 X5 0.04653138
#> 3 X5 0.57770907
#> 4 X2 0.54299634
#> 5 X4 1.06730788
#> 6 X3 0.93536319
#> 7 X1 0.62898204
#> 8 X1 2.06502490
#> 9 X3 1.62354888
#> 10 X2 1.88850493
## column indices
as.integer(m$index)
#> [1] 5 5 5 2 4 3 1 1 3 2
find_min(mat)
#> index value
#> 1 X2 -1.86301149
#> 2 X4 -1.91008747
#> 3 X4 -0.27923724
#> 4 X3 -0.82678895
#> 5 X5 -1.91172049
#> 6 X1 -0.55369938
#> 7 X4 -0.63912332
#> 8 X2 -1.30454355
#> 9 X1 -1.63098940
#> 10 X5 0.02956075
map <- cbind(c("a","b","c","d","e","f","g"),
c("A","B","B","C","D","D","E"))
#x <- factor(sample(map[1:6,1], 100, replace=TRUE), levels=map[,1])
x <- as.factor(sample(map[1:6,1], 100, replace=TRUE))
x[2] <- NA
table(x, reclass(x, map, all = FALSE), useNA="always")
#>
#> x A B C D <NA>
#> a 22 0 0 0 0
#> b 0 15 0 0 0
#> c 0 18 0 0 0
#> d 0 0 16 0 0
#> e 0 0 0 19 0
#> f 0 0 0 9 0
#> <NA> 0 0 0 0 1
table(x, reclass(x, map, all = TRUE), useNA="always")
#>
#> x A B C D E <NA>
#> a 22 0 0 0 0 0
#> b 0 15 0 0 0 0
#> c 0 18 0 0 0 0
#> d 0 0 16 0 0 0
#> e 0 0 0 19 0 0
#> f 0 0 0 9 0 0
#> <NA> 0 0 0 0 0 1
map[c(4, 7), 2] <- NA
table(x, reclass(x, map, all = FALSE, allow_NA = TRUE), useNA="always")
#>
#> x A B D <NA>
#> a 22 0 0 0
#> b 0 15 0 0
#> c 0 18 0 0
#> d 0 0 0 16
#> e 0 0 19 0
#> f 0 0 9 0
#> <NA> 0 0 0 1
table(x, reclass(x, map, all = TRUE, allow_NA = TRUE), useNA="always")
#>
#> x A B D <NA>
#> a 22 0 0 0
#> b 0 15 0 0
#> c 0 18 0 0
#> d 0 0 0 16
#> e 0 0 19 0
#> f 0 0 9 0
#> <NA> 0 0 0 1
(mat2 <- exp(mat) / rowSums(exp(mat)))
#> [,1] [,2] [,3] [,4] [,5]
#> [1,] 0.15146605 0.007455399 0.04357593 0.04201190 0.75549072
#> [2,] 0.06902728 0.253232229 0.16741439 0.06319543 0.44713066
#> [3,] 0.14867992 0.180642369 0.18738603 0.14401016 0.33928152
#> [4,] 0.16325093 0.358699287 0.09116756 0.15232891 0.23455332
#> [5,] 0.17013039 0.090486058 0.04974306 0.65627407 0.03336642
#> [6,] 0.07043799 0.195700726 0.31224725 0.13142903 0.29018500
#> [7,] 0.32238898 0.247083922 0.20505237 0.09070872 0.13476601
#> [8,] 0.70420292 0.024227737 0.11394628 0.08495118 0.07267188
#> [9,] 0.02138002 0.228428375 0.55390615 0.08494151 0.11134394
#> [10,] 0.13925597 0.551362081 0.09330978 0.13014952 0.08592266
(rmat2 <- redistribute(mat2, source = 1, target = 2:4))
#> [,1] [,2] [,3] [,4] [,5]
#> [1,] 0 0.01959212 0.11451364 0.11040352 0.75549072
#> [2,] 0 0.28935958 0.19129855 0.07221120 0.44713066
#> [3,] 0 0.23309524 0.24179705 0.18582619 0.33928152
#> [4,] 0 0.45594008 0.11588242 0.19362418 0.23455332
#> [5,] 0 0.10981357 0.06036801 0.79645199 0.03336642
#> [6,] 0 0.21726041 0.34664647 0.14590812 0.29018500
#> [7,] 0 0.39382403 0.32683045 0.14457951 0.13476601
#> [8,] 0 0.10069262 0.47357085 0.35306465 0.07267188
#> [9,] 0 0.23405957 0.56756100 0.08703548 0.11134394
#> [10,] 0 0.65045648 0.11008002 0.15354084 0.08592266
colMeans(mat2)
#> [1] 0.1960220 0.2137318 0.1817749 0.1580000 0.2504712
colMeans(rmat2)
#> [1] 0.0000000 0.2704094 0.2548548 0.2242646 0.2504712
stopifnot(abs(sum(mat2) - sum(rmat2)) < 10^-6)