I have a list of lists stored in df_list (sample code below). I want to convert it into a dataframe but I want to keep a counter count such that it tracks which list the data is from.
I want the end product to look something like this:
count replicate level high.density low.density
1 1 low 54 36
1 1 low 54 31
1 2 low 11 28
1 2 low 11 45
1 1 mid 24 10
1 2 mid 12 24
1 2 mid 12 17
1 2 up 40 2
2 1 low 54 31
2 1 low 54 31
2 2 low 11 45
2 2 low 11 28
2 1 mid 24 10
2 2 mid 12 24
2 2 up 20 2
......
1000 2 up 40 5
#List of Lists code
df <- structure(list(replicate = c(1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2), level = c("low", "low", "mid", "mid", "low", "low", "mid", "mid", "up", "up", "up"), high.density = c(14, 54, 82, 24, 12, 11, 12, NA, 40, NA, 20), low.density = c(36, 31, 10,
NA, 28, 45, 17, 24, 10, 5, 2)), class = c("spec_tbl_df","tbl_df","tbl", "data.frame"), row.names = c(NA, -11L), spec = structure(list(cols = list(replicate = structure(list(), class = c("collector_double", "collector")), level = structure(list(), class = c("collector_character","collector")), high.density = structure(list(), class = c("collector_double","collector")), low.density = structure(list(), class = c("collector_double",
"collector"))), default = structure(list(), class = c("collector_guess", "collector")), skip = 1L), class = "col_spec"))
df$replicate <- as.factor(as.numeric(df$replicate))
df$level <- as.factor(as.character(df$level))
df_shuffle <- function(DF) {
my_split <- split(DF, f = ~ DF$replicate DF$level)
shuffle <- lapply(my_split, \(x) {
nrX <- nrow(x)
cbind(x[, c('replicate', 'level')],
high.density = x[sample(seq_len(nrX), replace = TRUE), 'high.density'],
low.density = x[sample(seq_len(nrX), replace = TRUE), 'low.density'])
})
DF_new <- do.call(rbind, shuffle)
rownames(DF_new) <- NULL
return(DF_new)
}
B <- 1000
df_list <- replicate(B, df_shuffle(df), simplify = FALSE)
CodePudding user response:
We can use purrr::imap and dplyr::bind_rows for this.
In imap we create a new column for each data.frame .x with mutate. The new column is called count and .y is the index of each list element. We use mutates .before argument to make this column the first column of each data.frame. The result of the call to imap is a list of data.frames which we merge together into one large data.frame with dplyr::bind_rows.
library(tidyverse)
imap(df_list,
~ mutate(.x,
count = .y,
.before = "replicate")) %>%
bind_rows()
#> count replicate level high.density low.density
#> 1 1 1 low 14 31
#> 2 1 1 low 14 36
#> 3 1 2 low 11 45
#> 4 1 2 low 12 28
#> 5 1 1 mid 24 10
#> 6 1 1 mid 24 10
#> 7 1 2 mid 12 17
#> 8 1 2 mid NA 17
#> 9 1 2 up 40 5
#> 10 1 2 up 20 5
#> 11 1 2 up 20 5
#> 12 2 1 low 14 31
#> 13 2 1 low 54 31
#> ...
Created on 2022-01-20 by the reprex package (v2.0.1)
CodePudding user response:
Or, we can simply utilize data.table::rbindlist() function:
library(data.table)
rbindlist(df_list, idcol = 'count')
# count replicate level high.density low.density
# 1: 1 1 low 14 31
# 2: 1 1 low 14 31
# 3: 1 2 low 12 45
# 4: 1 2 low 12 45
# 5: 1 1 mid 24 NA
# ---
# 10996: 1000 2 mid NA 17
# 10997: 1000 2 mid NA 17
# 10998: 1000 2 up 20 10
# 10999: 1000 2 up 20 5
# 11000: 1000 2 up 40 2
