Home > Mobile >  how to optimize the use of `expand.grid` or `combn` in R
how to optimize the use of `expand.grid` or `combn` in R

Time:01-22

I have a vector of characters v <- c("piment","aubergine","carotte","oignon","chou","pommeDeTerre") and I would like to combine them to prepare a complete experimental design. So I want to produce a data.frame with for each line a set of n elements and as many lines as possible

v <- c("piment","aubergine","carotte","oignon","chou","pommeDeTerre")
n <- 12

## TEST 1 : crach R
tmp <- data.frame(matrix(rep(v,n), ncol = n))
expand.grid(tmp)

## TEST 2 : 
temp = t(combn(rep(v,nbslot), nbslot))
#Error in matrix(r, nrow = len.r, ncol = count) : 
#  valeur 'ncol' incorrecte (trop grande ou NA)
#De plus : Warning message:
#In combn(rep(v, nbslot), nbslot) :
#  NAs introduced by coercion to integer range

Seems to work for n <- 8 but not for n <- 12. How to overpass this issue

CodePudding user response:

Perhaps this helps you. It contains all combinations of the 6 variables (at least 1, and up to all), for a total of 63:

# variables
vars <- c("piment","aubergine","carotte","oignon","chou","pommeDeTerre")

# combinations by number of variables
L <- lapply(seq(vars), FUN = function(m){combn(x = seq(vars), m = m)})

 # total number of combinations
(S <- sum(unlist(lapply(L, ncol)))) # 63

# a data.frame of the combinations
L2 <- lapply(L, FUN = function(x){apply(x, 2, function(y){
  z <- rep(0, length(vars))
  z[y] <- 1
  z
})})

df <- as.data.frame(t(do.call("cbind", L2)))
df

#    piment aubergine carotte oignon chou pommeDeTerre
# 1       1         0       0      0    0            0
# 2       0         1       0      0    0            0
# 3       0         0       1      0    0            0
# 4       0         0       0      1    0            0
# 5       0         0       0      0    1            0
# 6       0         0       0      0    0            1
# 7       1         1       0      0    0            0
# 8       1         0       1      0    0            0
# 9       1         0       0      1    0            0
# 10      1         0       0      0    1            0
# 11      1         0       0      0    0            1
# 12      0         1       1      0    0            0
# 13      0         1       0      1    0            0
# 14      0         1       0      0    1            0
# 15      0         1       0      0    0            1
# 16      0         0       1      1    0            0
# 17      0         0       1      0    1            0
# 18      0         0       1      0    0            1
# 19      0         0       0      1    1            0
# 20      0         0       0      1    0            1
# 21      0         0       0      0    1            1
# 22      1         1       1      0    0            0
# 23      1         1       0      1    0            0
# 24      1         1       0      0    1            0
# 25      1         1       0      0    0            1
# 26      1         0       1      1    0            0
# 27      1         0       1      0    1            0
# 28      1         0       1      0    0            1
# 29      1         0       0      1    1            0
# 30      1         0       0      1    0            1
# 31      1         0       0      0    1            1
# 32      0         1       1      1    0            0
# 33      0         1       1      0    1            0
# 34      0         1       1      0    0            1
# 35      0         1       0      1    1            0
# 36      0         1       0      1    0            1
# 37      0         1       0      0    1            1
# 38      0         0       1      1    1            0
# 39      0         0       1      1    0            1
# 40      0         0       1      0    1            1
# 41      0         0       0      1    1            1
# 42      1         1       1      1    0            0
# 43      1         1       1      0    1            0
# 44      1         1       1      0    0            1
# 45      1         1       0      1    1            0
# 46      1         1       0      1    0            1
# 47      1         1       0      0    1            1
# 48      1         0       1      1    1            0
# 49      1         0       1      1    0            1
# 50      1         0       1      0    1            1
# 51      1         0       0      1    1            1
# 52      0         1       1      1    1            0
# 53      0         1       1      1    0            1
# 54      0         1       1      0    1            1
# 55      0         1       0      1    1            1
# 56      0         0       1      1    1            1
# 57      1         1       1      1    1            0
# 58      1         1       1      1    0            1
# 59      1         1       1      0    1            1
# 60      1         1       0      1    1            1
# 61      1         0       1      1    1            1
# 62      0         1       1      1    1            1
# 63      1         1       1      1    1            1

CodePudding user response:

You could use combinations from gtools package.

As an illustration with r=5, but also works with r=12:

library(gtools)
combinations(length(v),v,r=5,repeats.allowed = T)
       [,1]           [,2]           [,3]           [,4]           [,5]          
  [1,] "aubergine"    "aubergine"    "aubergine"    "aubergine"    "aubergine"   
  [2,] "aubergine"    "aubergine"    "aubergine"    "aubergine"    "carotte"     
  [3,] "aubergine"    "aubergine"    "aubergine"    "aubergine"    "chou"        
  [4,] "aubergine"    "aubergine"    "aubergine"    "aubergine"    "oignon"      
  [5,] "aubergine"    "aubergine"    "aubergine"    "aubergine"    "piment"      
  [6,] "aubergine"    "aubergine"    "aubergine"    "aubergine"    "pommeDeTerre"
  [7,] "aubergine"    "aubergine"    "aubergine"    "carotte"      "carotte"     
...

CodePudding user response:

I took an assumption based on your desired rows and n that you want a set with replacement so all can occur more than once. I also assume you will never with an n of 12 use all billion combinations. So what does my function do.

It will give you a unique random sample and all samples are different from one another.

mysamples <- function(options, build = list(), samples, rows) {
  if (length(build) < rows) {
    build[[length(build)   1]] <- sample(options, samples, replace = T)
    build <- unique(build)
    mysamples(options, build, samples, rows)
  } else {
    return(build)
  }
}

v <- c("piment","aubergine","carotte","oignon","chou","pommeDeTerre")

# note that trying all combinations first would be 11441304000 combinations!
mysamples(options = v, samples = 50, rows = 1000)

# smaller sample allowing to show all results below
mysamples(options = v, samples = 5, rows = 10)

[[1]]
[1] "carotte"      "aubergine"    "piment"       "pommeDeTerre" "pommeDeTerre"

[[2]]
[1] "carotte"   "oignon"    "aubergine" "chou"      "oignon"   

[[3]]
[1] "piment"       "carotte"      "chou"         "pommeDeTerre" "carotte"     

[[4]]
[1] "oignon"       "oignon"       "aubergine"    "carotte"      "pommeDeTerre"

[[5]]
[1] "oignon"    "chou"      "piment"    "aubergine" "piment"   

[[6]]
[1] "chou"      "aubergine" "chou"      "aubergine" "oignon"   

[[7]]
[1] "chou"      "aubergine" "carotte"   "carotte"   "carotte"  

[[8]]
[1] "aubergine" "aubergine" "carotte"   "carotte"   "oignon"   

[[9]]
[1] "carotte"   "carotte"   "carotte"   "carotte"   "aubergine"

[[10]]
[1] "piment"    "aubergine" "aubergine" "chou"      "oignon" 


mysamples(options = v, samples = 50, rows = 2)

[[1]]
 [1] "pommeDeTerre" "carotte"      "aubergine"    "aubergine"    "pommeDeTerre" "oignon"       "carotte"      "aubergine"    "pommeDeTerre" "chou"         "chou"         "carotte"      "pommeDeTerre"
[14] "piment"       "carotte"      "oignon"       "piment"       "chou"         "chou"         "pommeDeTerre" "piment"       "oignon"       "carotte"      "aubergine"    "pommeDeTerre" "piment"      
[27] "aubergine"    "pommeDeTerre" "chou"         "pommeDeTerre" "pommeDeTerre" "carotte"      "oignon"       "piment"       "oignon"       "piment"       "chou"         "pommeDeTerre" "carotte"     
[40] "carotte"      "oignon"       "chou"         "oignon"       "pommeDeTerre" "chou"         "oignon"       "oignon"       "oignon"       "carotte"      "chou"        

[[2]]
 [1] "aubergine"    "piment"       "oignon"       "piment"       "oignon"       "oignon"       "piment"       "chou"         "chou"         "carotte"      "chou"         "pommeDeTerre" "piment"      
[14] "chou"         "chou"         "piment"       "aubergine"    "pommeDeTerre" "chou"         "aubergine"    "chou"         "piment"       "carotte"      "pommeDeTerre" "chou"         "pommeDeTerre"
[27] "oignon"       "pommeDeTerre" "piment"       "oignon"       "piment"       "oignon"       "carotte"      "oignon"       "pommeDeTerre" "oignon"       "piment"       "piment"       "carotte"     
[40] "piment"       "aubergine"    "chou"         "oignon"       "oignon"       "pommeDeTerre" "oignon"       "oignon"       "aubergine"    "piment"       "aubergine"   
  •  Tags:  
  • Related