I have a vector of characters v <- c("piment","aubergine","carotte","oignon","chou","pommeDeTerre") and I would like to combine them to prepare a complete experimental design. So I want to produce a data.frame with for each line a set of n elements and as many lines as possible
v <- c("piment","aubergine","carotte","oignon","chou","pommeDeTerre")
n <- 12
## TEST 1 : crach R
tmp <- data.frame(matrix(rep(v,n), ncol = n))
expand.grid(tmp)
## TEST 2 :
temp = t(combn(rep(v,nbslot), nbslot))
#Error in matrix(r, nrow = len.r, ncol = count) :
# valeur 'ncol' incorrecte (trop grande ou NA)
#De plus : Warning message:
#In combn(rep(v, nbslot), nbslot) :
# NAs introduced by coercion to integer range
Seems to work for n <- 8 but not for n <- 12. How to overpass this issue
CodePudding user response:
Perhaps this helps you. It contains all combinations of the 6 variables (at least 1, and up to all), for a total of 63:
# variables
vars <- c("piment","aubergine","carotte","oignon","chou","pommeDeTerre")
# combinations by number of variables
L <- lapply(seq(vars), FUN = function(m){combn(x = seq(vars), m = m)})
# total number of combinations
(S <- sum(unlist(lapply(L, ncol)))) # 63
# a data.frame of the combinations
L2 <- lapply(L, FUN = function(x){apply(x, 2, function(y){
z <- rep(0, length(vars))
z[y] <- 1
z
})})
df <- as.data.frame(t(do.call("cbind", L2)))
df
# piment aubergine carotte oignon chou pommeDeTerre
# 1 1 0 0 0 0 0
# 2 0 1 0 0 0 0
# 3 0 0 1 0 0 0
# 4 0 0 0 1 0 0
# 5 0 0 0 0 1 0
# 6 0 0 0 0 0 1
# 7 1 1 0 0 0 0
# 8 1 0 1 0 0 0
# 9 1 0 0 1 0 0
# 10 1 0 0 0 1 0
# 11 1 0 0 0 0 1
# 12 0 1 1 0 0 0
# 13 0 1 0 1 0 0
# 14 0 1 0 0 1 0
# 15 0 1 0 0 0 1
# 16 0 0 1 1 0 0
# 17 0 0 1 0 1 0
# 18 0 0 1 0 0 1
# 19 0 0 0 1 1 0
# 20 0 0 0 1 0 1
# 21 0 0 0 0 1 1
# 22 1 1 1 0 0 0
# 23 1 1 0 1 0 0
# 24 1 1 0 0 1 0
# 25 1 1 0 0 0 1
# 26 1 0 1 1 0 0
# 27 1 0 1 0 1 0
# 28 1 0 1 0 0 1
# 29 1 0 0 1 1 0
# 30 1 0 0 1 0 1
# 31 1 0 0 0 1 1
# 32 0 1 1 1 0 0
# 33 0 1 1 0 1 0
# 34 0 1 1 0 0 1
# 35 0 1 0 1 1 0
# 36 0 1 0 1 0 1
# 37 0 1 0 0 1 1
# 38 0 0 1 1 1 0
# 39 0 0 1 1 0 1
# 40 0 0 1 0 1 1
# 41 0 0 0 1 1 1
# 42 1 1 1 1 0 0
# 43 1 1 1 0 1 0
# 44 1 1 1 0 0 1
# 45 1 1 0 1 1 0
# 46 1 1 0 1 0 1
# 47 1 1 0 0 1 1
# 48 1 0 1 1 1 0
# 49 1 0 1 1 0 1
# 50 1 0 1 0 1 1
# 51 1 0 0 1 1 1
# 52 0 1 1 1 1 0
# 53 0 1 1 1 0 1
# 54 0 1 1 0 1 1
# 55 0 1 0 1 1 1
# 56 0 0 1 1 1 1
# 57 1 1 1 1 1 0
# 58 1 1 1 1 0 1
# 59 1 1 1 0 1 1
# 60 1 1 0 1 1 1
# 61 1 0 1 1 1 1
# 62 0 1 1 1 1 1
# 63 1 1 1 1 1 1
CodePudding user response:
You could use combinations from gtools package.
As an illustration with r=5, but also works with r=12:
library(gtools)
combinations(length(v),v,r=5,repeats.allowed = T)
[,1] [,2] [,3] [,4] [,5]
[1,] "aubergine" "aubergine" "aubergine" "aubergine" "aubergine"
[2,] "aubergine" "aubergine" "aubergine" "aubergine" "carotte"
[3,] "aubergine" "aubergine" "aubergine" "aubergine" "chou"
[4,] "aubergine" "aubergine" "aubergine" "aubergine" "oignon"
[5,] "aubergine" "aubergine" "aubergine" "aubergine" "piment"
[6,] "aubergine" "aubergine" "aubergine" "aubergine" "pommeDeTerre"
[7,] "aubergine" "aubergine" "aubergine" "carotte" "carotte"
...
CodePudding user response:
I took an assumption based on your desired rows and n that you want a set with replacement so all can occur more than once. I also assume you will never with an n of 12 use all billion combinations. So what does my function do.
It will give you a unique random sample and all samples are different from one another.
mysamples <- function(options, build = list(), samples, rows) {
if (length(build) < rows) {
build[[length(build) 1]] <- sample(options, samples, replace = T)
build <- unique(build)
mysamples(options, build, samples, rows)
} else {
return(build)
}
}
v <- c("piment","aubergine","carotte","oignon","chou","pommeDeTerre")
# note that trying all combinations first would be 11441304000 combinations!
mysamples(options = v, samples = 50, rows = 1000)
# smaller sample allowing to show all results below
mysamples(options = v, samples = 5, rows = 10)
[[1]]
[1] "carotte" "aubergine" "piment" "pommeDeTerre" "pommeDeTerre"
[[2]]
[1] "carotte" "oignon" "aubergine" "chou" "oignon"
[[3]]
[1] "piment" "carotte" "chou" "pommeDeTerre" "carotte"
[[4]]
[1] "oignon" "oignon" "aubergine" "carotte" "pommeDeTerre"
[[5]]
[1] "oignon" "chou" "piment" "aubergine" "piment"
[[6]]
[1] "chou" "aubergine" "chou" "aubergine" "oignon"
[[7]]
[1] "chou" "aubergine" "carotte" "carotte" "carotte"
[[8]]
[1] "aubergine" "aubergine" "carotte" "carotte" "oignon"
[[9]]
[1] "carotte" "carotte" "carotte" "carotte" "aubergine"
[[10]]
[1] "piment" "aubergine" "aubergine" "chou" "oignon"
mysamples(options = v, samples = 50, rows = 2)
[[1]]
[1] "pommeDeTerre" "carotte" "aubergine" "aubergine" "pommeDeTerre" "oignon" "carotte" "aubergine" "pommeDeTerre" "chou" "chou" "carotte" "pommeDeTerre"
[14] "piment" "carotte" "oignon" "piment" "chou" "chou" "pommeDeTerre" "piment" "oignon" "carotte" "aubergine" "pommeDeTerre" "piment"
[27] "aubergine" "pommeDeTerre" "chou" "pommeDeTerre" "pommeDeTerre" "carotte" "oignon" "piment" "oignon" "piment" "chou" "pommeDeTerre" "carotte"
[40] "carotte" "oignon" "chou" "oignon" "pommeDeTerre" "chou" "oignon" "oignon" "oignon" "carotte" "chou"
[[2]]
[1] "aubergine" "piment" "oignon" "piment" "oignon" "oignon" "piment" "chou" "chou" "carotte" "chou" "pommeDeTerre" "piment"
[14] "chou" "chou" "piment" "aubergine" "pommeDeTerre" "chou" "aubergine" "chou" "piment" "carotte" "pommeDeTerre" "chou" "pommeDeTerre"
[27] "oignon" "pommeDeTerre" "piment" "oignon" "piment" "oignon" "carotte" "oignon" "pommeDeTerre" "oignon" "piment" "piment" "carotte"
[40] "piment" "aubergine" "chou" "oignon" "oignon" "pommeDeTerre" "oignon" "oignon" "aubergine" "piment" "aubergine"
