Home > Back-end >  Count frequency of a character by group
Count frequency of a character by group

Time:01-21

I want to count reasons for infertility by group, however some people have more than one reason. What is the best way code this to present this in a table

ID<- c('id1','id2','id3','id4','id5','id6')
rsn <- c('endo','endo, male','male','unexplained','other','male, endo, other')
group<- c('group1','group2','group2','group3','group1','group3')
data <- data.frame(ID, rsn, group)

The output would be something like

                     group1 group2 group3 
endo                      1      1      1
male                      0      2      0
other                     1      0      1        
unexplained               0      0      1

CodePudding user response:

tidyverse

ID<- c('id1','id2','id3','id4','id5','id6')
rsn <- c('endo','endo, male','male','unexplained','other','male, endo, other')
group<- c('group1','group2','group2','group3','group1','group3')

data <- data.frame(ID, rsn, group)

library(tidyverse)

data %>%
  separate_rows(rsn) %>%
  pivot_wider(
    id_cols = rsn,
    names_from = group,
    values_from = group,
    values_fn = length,
    values_fill = 0
  )
#> # A tibble: 4 x 4
#>   rsn         group1 group2 group3
#>   <chr>        <int>  <int>  <int>
#> 1 endo             1      1      1
#> 2 male             0      2      1
#> 3 unexplained      0      0      1
#> 4 other            1      0      1

Created on 2022-01-20 by the reprex package (v2.0.1)

data.table

library(magrittr)
library(data.table)

setDT(data)[, 
            list(rsn = unlist(tstrsplit(rsn, split = ", "))), 
            by = list(ID, group)
            ] %>%
  dcast(data = ., formula = rsn ~ group, fun.aggregate = length)
#>            rsn group1 group2 group3
#> 1:        endo      1      1      1
#> 2:        male      0      2      1
#> 3:       other      1      0      1
#> 4: unexplained      0      0      1

Created on 2022-01-20 by the reprex package (v2.0.1)

CodePudding user response:

ID<- c('id1','id2','id3','id4','id5','id6')
rsn <- c('endo','endo, male','male','unexplained','other','male, endo, other')
group<- c('group1','group2','group2','group3','group1','group3')
data <- data.frame(ID, rsn, group)


library(data.table)

setDT(data)

dcast(data[, .(rsn = unlist(str_split(rsn, ", "))), by = group], rsn ~ group, value.var = "rsn", fun.aggregate = length)

           rsn group1 group2 group3
1:        endo      1      1      1
2:        male      0      2      1
3:       other      1      0      1
4: unexplained      0      0      1
  •  Tags:  
  • Related