Home > Blockchain >  R: How to Intersect multiple vectors that gives all possible combination
R: How to Intersect multiple vectors that gives all possible combination

Time:01-06

How to Intersect multiple vectors that gives all possible combination

Here is dummy data:

set1 <- c("g1", "g2","g3","g4")
set2 <- c("g1", "g2","g8")
set3 <- c("g17", "g4")
set4 <- c("g1", "g3")
set5 <- c("g5")

And is it possible to get all combination list? Tidyverse approach will be helpful.

#----Expected result----------------------------------

# set1 & set2 & set4
# "g1"

# set1 & set4
# "g2"

# set1 & set2
# "g2"

# set1 & set4
# "g3"

# set1 & set3
# "g4"

# set2
# "g8"

# set3
# "g8"

# set3
# "g17"

# set5
# "g5"

etc... I might missed some possible combination.

Thank you.

CodePudding user response:

l <- mget(ls(pattern = '^set\\d'))
map(seq(2, length(l)), ~combn(l, .x, \(x)
      list(reduce(x, intersect))%>%
        set_names(str_c(names(x), collapse = ' & ')),
      simplify = FALSE)) %>%
  unlist(FALSE) %>%
  unlist(FALSE) %>%
  keep(~length(.x)>0)

$`set1 & set2`
[1] "g1" "g2"

$`set1 & set3`
[1] "g4"

$`set1 & set4`
[1] "g1" "g3"

$`set2 & set4`
[1] "g1"

$`set1 & set2 & set4`
[1] "g1"

CodePudding user response:

I would suggest saving the sets in a list, then you could iterate over the elements of the list, e.g.:

sets2intersect <- list(set1, set2, set3,set4,set5)

lapply(unique(unlist(sets2intersect)), function(i){
  which(sapply(sets2intersect, function(x) any(i == x)))
})

[1]]
[1] 1 2 4

[[2]]
[1] 1 2

[[3]]
[1] 1 4

[[4]]
[1] 1 3

[[5]]
[1] 2

[[6]]
[1] 3

[[7]]
[1] 5

If you want to rename your list, to know which element was used, you can do:

result<- lapply(unique(unlist(sets2intersect)), function(i){
  which(sapply(sets2intersect, function(x) any(i == x)))
})
names(result) <- unique(unlist(sets2intersect))

$g1
[1] 1 2 4

$g2
[1] 1 2

$g3
[1] 1 4

$g4
[1] 1 3

$g8
[1] 2

$g17
[1] 3

$g5
[1] 5

CodePudding user response:

The answer by https://stackoverflow.com/users/15980284/jkupzig provides the non-empty set of intersections. If you want both empty and non-empty, you can use a similar approach for all possible

library(DescTools)
names_of <- c("set1", "set2", "set3", "set4", "set5")
twins <- CombSet(names_of, 2, repl=FALSE, ord=FALSE)
trios <- CombSet(names_of, 3, repl=FALSE, ord=FALSE)
quads <- CombSet(names_of, 4, repl=FALSE, ord=FALSE)
quint <- CombSet(names_of, 5, repl=FALSE, ord=FALSE)

quint
#>      [,1]   [,2]   [,3]   [,4]   [,5]  
#> [1,] "set1" "set2" "set3" "set4" "set5"
quads
#>      [,1]   [,2]   [,3]   [,4]  
#> [1,] "set1" "set2" "set3" "set4"
#> [2,] "set1" "set2" "set3" "set5"
#> [3,] "set1" "set2" "set4" "set5"
#> [4,] "set1" "set3" "set4" "set5"
#> [5,] "set2" "set3" "set4" "set5"
trios
#>       [,1]   [,2]   [,3]  
#>  [1,] "set1" "set2" "set3"
#>  [2,] "set1" "set2" "set4"
#>  [3,] "set1" "set2" "set5"
#>  [4,] "set1" "set3" "set4"
#>  [5,] "set1" "set3" "set5"
#>  [6,] "set1" "set4" "set5"
#>  [7,] "set2" "set3" "set4"
#>  [8,] "set2" "set3" "set5"
#>  [9,] "set2" "set4" "set5"
#> [10,] "set3" "set4" "set5"
twins
#>       [,1]   [,2]  
#>  [1,] "set1" "set2"
#>  [2,] "set1" "set3"
#>  [3,] "set1" "set4"
#>  [4,] "set1" "set5"
#>  [5,] "set2" "set3"
#>  [6,] "set2" "set4"
#>  [7,] "set2" "set5"
#>  [8,] "set3" "set4"
#>  [9,] "set3" "set5"
#> [10,] "set4" "set5"

CodePudding user response:

Building off the great answer from @Onyambu and whith help in the comments, I've added the members unique to each set using setdiff() which it appears was included in OP's desired output.

library(tidyverse)

set1 <- c("g1", "g2","g3","g4")
set2 <- c("g1", "g2","g8")
set3 <- c("g17", "g4")
set4 <- c("g1", "g3")
set5 <- c("g5")


l <- mget(ls(pattern = '^set\\d'))

map(2:length(l),
      ~ combn(l, .x, \(x)
              list(reduce(x, intersect)) %>%
                set_names(str_c(names(
                  x
                ), collapse = ' & ')),
              simplify = FALSE)) %>%
  unlist(FALSE) %>%
  unlist(FALSE) %>%
  c(.,
    map(seq_along(l), ~reduce(l[-.x], setdiff,.init = l[[.x]])) %>% 
      set_names(names(l))
    ) %>% 
  keep(~ length(.x) > 0)
#> $`set1 & set2`
#> [1] "g1" "g2"
#> 
#> $`set1 & set3`
#> [1] "g4"
#> 
#> $`set1 & set4`
#> [1] "g1" "g3"
#> 
#> $`set2 & set4`
#> [1] "g1"
#> 
#> $`set1 & set2 & set4`
#> [1] "g1"
#> 
#> $set2
#> [1] "g8"
#> 
#> $set3
#> [1] "g17"
#> 
#> $set5
#> [1] "g5"

Created on 2022-01-06 by the reprex package (v2.0.1)

  •  Tags:  
  • Related