Utility functions for factors and compositional data
find_max.Rd
Utility functions for factors and compositional data.
Usage
compare_sets(x, y)
find_max(x)
find_min(x)
reclass(x, map, all = FALSE, allow_NA = FALSE)
redistribute(x, source, target = NULL)
Arguments
- x, y
any type for
compare_sets
, matrix forfind_max
,find_min
, andredistribute
, a factor forreclass
.- map
a reclassification matrix with 2 columns (1st: original levels, 2nd: output levels mapped to original levels).
- all
logical, whether all levels from mapping matrix should be applied on the return object.
- allow_NA
logical, whether
NA
s are allowed as part ofmap
.- source
numeric or character, single column index for input matrix
x
.- target
numeric or character, column index or indices for input matrix
x
.
Value
A matrix compare_sets
.
A data frame for find_max
and find_min
.
A reclassified factor for reclass
.
A matrix for redistribute
where the source column values are
redistributed among the target columns proportionally.
Examples
## numeric vector
compare_sets(1:10, 8:15)
#> xlength ylength intersect union xbutnoty ybutnotx
#> labels 10 8 3 15 7 5
#> unique 10 8 3 15 7 5
## factor with 'zombie' labels
compare_sets(factor(1:10, levels=1:10), factor(8:15, levels=1:15))
#> xlength ylength intersect union xbutnoty ybutnotx
#> labels 10 15 10 15 0 5
#> unique 10 8 3 15 7 5
(mat <- matrix(rnorm(10*5), 10, 5))
#> [,1] [,2] [,3] [,4] [,5]
#> [1,] -1.400043517 -0.55369938 0.46815442 0.9353632 0.07003485
#> [2,] 0.255317055 0.62898204 0.36295126 0.1764886 -0.63912332
#> [3,] -2.437263611 2.06502490 -1.30454355 0.2436855 -0.04996490
#> [4,] -0.005571287 -1.63098940 0.73777632 1.6235489 -0.25148344
#> [5,] 0.621552721 0.51242695 1.88850493 0.1120381 0.44479712
#> [6,] 1.148411606 -1.86301149 -0.09744510 -0.1339970 2.75541758
#> [7,] -1.821817661 -0.52201251 -0.93584735 -1.9100875 0.04653138
#> [8,] -0.247325302 -0.05260191 -0.01595031 -0.2792372 0.57770907
#> [9,] -0.244199607 0.54299634 -0.82678895 -0.3134460 0.11819487
#> [10,] -0.282705449 -0.91407483 -1.51239965 1.0673079 -1.91172049
(m <- find_max(mat))
#> index value
#> 1 X4 0.93536319
#> 2 X2 0.62898204
#> 3 X2 2.06502490
#> 4 X4 1.62354888
#> 5 X3 1.88850493
#> 6 X5 2.75541758
#> 7 X5 0.04653138
#> 8 X5 0.57770907
#> 9 X2 0.54299634
#> 10 X4 1.06730788
## column indices
as.integer(m$index)
#> [1] 4 2 2 4 3 5 5 5 2 4
find_min(mat)
#> index value
#> 1 X1 -1.4000435
#> 2 X5 -0.6391233
#> 3 X1 -2.4372636
#> 4 X2 -1.6309894
#> 5 X4 0.1120381
#> 6 X2 -1.8630115
#> 7 X4 -1.9100875
#> 8 X4 -0.2792372
#> 9 X3 -0.8267890
#> 10 X5 -1.9117205
map <- cbind(c("a","b","c","d","e","f","g"),
c("A","B","B","C","D","D","E"))
#x <- factor(sample(map[1:6,1], 100, replace=TRUE), levels=map[,1])
x <- as.factor(sample(map[1:6,1], 100, replace=TRUE))
x[2] <- NA
table(x, reclass(x, map, all = FALSE), useNA="always")
#>
#> x A B C D <NA>
#> a 20 0 0 0 0
#> b 0 17 0 0 0
#> c 0 16 0 0 0
#> d 0 0 18 0 0
#> e 0 0 0 18 0
#> f 0 0 0 10 0
#> <NA> 0 0 0 0 1
table(x, reclass(x, map, all = TRUE), useNA="always")
#>
#> x A B C D E <NA>
#> a 20 0 0 0 0 0
#> b 0 17 0 0 0 0
#> c 0 16 0 0 0 0
#> d 0 0 18 0 0 0
#> e 0 0 0 18 0 0
#> f 0 0 0 10 0 0
#> <NA> 0 0 0 0 0 1
map[c(4, 7), 2] <- NA
table(x, reclass(x, map, all = FALSE, allow_NA = TRUE), useNA="always")
#>
#> x A B D <NA>
#> a 20 0 0 0
#> b 0 17 0 0
#> c 0 16 0 0
#> d 0 0 0 18
#> e 0 0 18 0
#> f 0 0 10 0
#> <NA> 0 0 0 1
table(x, reclass(x, map, all = TRUE, allow_NA = TRUE), useNA="always")
#>
#> x A B D <NA>
#> a 20 0 0 0
#> b 0 17 0 0
#> c 0 16 0 0
#> d 0 0 0 18
#> e 0 0 18 0
#> f 0 0 10 0
#> <NA> 0 0 0 1
(mat2 <- exp(mat) / rowSums(exp(mat)))
#> [,1] [,2] [,3] [,4] [,5]
#> [1,] 0.040831390 0.095182422 0.26444918 0.42193778 0.17759923
#> [2,] 0.204093071 0.296557494 0.22728627 0.18862250 0.08344066
#> [3,] 0.008346517 0.753050903 0.02590833 0.12185032 0.09084393
#> [4,] 0.108918579 0.021438374 0.22905181 0.55541789 0.08517334
#> [5,] 0.145234726 0.130220017 0.51558565 0.08725515 0.12170446
#> [6,] 0.151466054 0.007455399 0.04357593 0.04201190 0.75549072
#> [7,] 0.069027282 0.253232229 0.16741439 0.06319543 0.44713066
#> [8,] 0.148679922 0.180642369 0.18738603 0.14401016 0.33928152
#> [9,] 0.163250927 0.358699287 0.09116756 0.15232891 0.23455332
#> [10,] 0.170130393 0.090486058 0.04974306 0.65627407 0.03336642
(rmat2 <- redistribute(mat2, source = 1, target = 2:4))
#> [,1] [,2] [,3] [,4] [,5]
#> [1,] 0 0.10015502 0.27826475 0.4439810 0.17759923
#> [2,] 0 0.38150935 0.29239469 0.2426553 0.08344066
#> [3,] 0 0.76002835 0.02614838 0.1229793 0.09084393
#> [4,] 0 0.02433577 0.26000819 0.6304827 0.08517334
#> [5,] 0 0.15601933 0.61773398 0.1045422 0.12170446
#> [6,] 0 0.01959212 0.11451364 0.1104035 0.75549072
#> [7,] 0 0.28935958 0.19129855 0.0722112 0.44713066
#> [8,] 0 0.23309524 0.24179705 0.1858262 0.33928152
#> [9,] 0 0.45594008 0.11588242 0.1936242 0.23455332
#> [10,] 0 0.10981357 0.06036801 0.7964520 0.03336642
colMeans(mat2)
#> [1] 0.1209979 0.2186965 0.1801568 0.2432904 0.2368584
colMeans(rmat2)
#> [1] 0.0000000 0.2529848 0.2198410 0.2903158 0.2368584
stopifnot(abs(sum(mat2) - sum(rmat2)) < 10^-6)