I want to count reasons for infertility by group, however some people have more than one reason. What is the best way code this to present this in a table
ID<- c('id1','id2','id3','id4','id5','id6')
rsn <- c('endo','endo, male','male','unexplained','other','male, endo, other')
group<- c('group1','group2','group2','group3','group1','group3')
data <- data.frame(ID, rsn, group)
The output would be something like
group1 group2 group3
endo 1 1 1
male 0 2 0
other 1 0 1
unexplained 0 0 1
CodePudding user response:
tidyverse
ID<- c('id1','id2','id3','id4','id5','id6')
rsn <- c('endo','endo, male','male','unexplained','other','male, endo, other')
group<- c('group1','group2','group2','group3','group1','group3')
data <- data.frame(ID, rsn, group)
library(tidyverse)
data %>%
separate_rows(rsn) %>%
pivot_wider(
id_cols = rsn,
names_from = group,
values_from = group,
values_fn = length,
values_fill = 0
)
#> # A tibble: 4 x 4
#> rsn group1 group2 group3
#> <chr> <int> <int> <int>
#> 1 endo 1 1 1
#> 2 male 0 2 1
#> 3 unexplained 0 0 1
#> 4 other 1 0 1
Created on 2022-01-20 by the reprex package (v2.0.1)
data.table
library(magrittr)
library(data.table)
setDT(data)[,
list(rsn = unlist(tstrsplit(rsn, split = ", "))),
by = list(ID, group)
] %>%
dcast(data = ., formula = rsn ~ group, fun.aggregate = length)
#> rsn group1 group2 group3
#> 1: endo 1 1 1
#> 2: male 0 2 1
#> 3: other 1 0 1
#> 4: unexplained 0 0 1
Created on 2022-01-20 by the reprex package (v2.0.1)
CodePudding user response:
ID<- c('id1','id2','id3','id4','id5','id6')
rsn <- c('endo','endo, male','male','unexplained','other','male, endo, other')
group<- c('group1','group2','group2','group3','group1','group3')
data <- data.frame(ID, rsn, group)
library(data.table)
setDT(data)
dcast(data[, .(rsn = unlist(str_split(rsn, ", "))), by = group], rsn ~ group, value.var = "rsn", fun.aggregate = length)
rsn group1 group2 group3
1: endo 1 1 1
2: male 0 2 1
3: other 1 0 1
4: unexplained 0 0 1
