Home > Back-end >  How to convert list of lists into a dataframe while keeping track of list "number" innR?
How to convert list of lists into a dataframe while keeping track of list "number" innR?

Time:01-21

I have a list of lists stored in df_list (sample code below). I want to convert it into a dataframe but I want to keep a counter count such that it tracks which list the data is from.

I want the end product to look something like this:

count   replicate level high.density low.density
1          1   low           54          36
1          1   low           54          31
1          2   low           11          28
1          2   low           11          45
1          1   mid           24          10
1          2   mid           12          24
1          2   mid           12          17
1          2    up           40           2
2          1   low           54          31
2          1   low           54          31
2          2   low           11          45
2          2   low           11          28
2          1   mid           24          10
2          2   mid           12          24
2          2    up           20           2
......
1000       2    up           40           5

#List of Lists code

df <- structure(list(replicate = c(1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2), level = c("low", "low", "mid", "mid", "low", "low", "mid", "mid", "up", "up", "up"), high.density = c(14, 54, 82, 24, 12, 11, 12, NA, 40, NA, 20), low.density = c(36, 31, 10, 
                                                                                                                                                                                                                                        NA, 28, 45, 17, 24, 10, 5, 2)), class = c("spec_tbl_df","tbl_df","tbl", "data.frame"), row.names = c(NA, -11L), spec = structure(list(cols = list(replicate = structure(list(), class = c("collector_double", "collector")), level = structure(list(), class = c("collector_character","collector")), high.density = structure(list(), class = c("collector_double","collector")), low.density = structure(list(), class = c("collector_double", 
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     "collector"))), default = structure(list(), class = c("collector_guess", "collector")), skip = 1L), class = "col_spec"))

df$replicate <- as.factor(as.numeric(df$replicate))
df$level <- as.factor(as.character(df$level))
                      
df_shuffle <- function(DF) {
  my_split <- split(DF, f = ~ DF$replicate   DF$level)
  shuffle <- lapply(my_split, \(x) {
    nrX <- nrow(x)
    cbind(x[, c('replicate', 'level')],
          high.density = x[sample(seq_len(nrX), replace = TRUE), 'high.density'],
          low.density = x[sample(seq_len(nrX), replace = TRUE), 'low.density'])
  })
  DF_new <- do.call(rbind, shuffle)
  rownames(DF_new) <- NULL
  return(DF_new)
}

B <- 1000
df_list <- replicate(B, df_shuffle(df), simplify = FALSE)

CodePudding user response:

We can use purrr::imap and dplyr::bind_rows for this.

In imap we create a new column for each data.frame .x with mutate. The new column is called count and .y is the index of each list element. We use mutates .before argument to make this column the first column of each data.frame. The result of the call to imap is a list of data.frames which we merge together into one large data.frame with dplyr::bind_rows.

library(tidyverse)

imap(df_list,
     ~ mutate(.x,
              count = .y, 
              .before = "replicate")) %>% 
  bind_rows()

#>       count replicate level high.density low.density
#> 1         1         1   low           14          31
#> 2         1         1   low           14          36
#> 3         1         2   low           11          45
#> 4         1         2   low           12          28
#> 5         1         1   mid           24          10
#> 6         1         1   mid           24          10
#> 7         1         2   mid           12          17
#> 8         1         2   mid           NA          17
#> 9         1         2    up           40           5
#> 10        1         2    up           20           5
#> 11        1         2    up           20           5
#> 12        2         1   low           14          31
#> 13        2         1   low           54          31
#> ...

Created on 2022-01-20 by the reprex package (v2.0.1)

CodePudding user response:

Or, we can simply utilize data.table::rbindlist() function:

library(data.table)

rbindlist(df_list, idcol = 'count')

#        count replicate level high.density low.density
#     1:     1         1   low           14          31
#     2:     1         1   low           14          31
#     3:     1         2   low           12          45
#     4:     1         2   low           12          45
#     5:     1         1   mid           24          NA
#    ---                                               
# 10996:  1000         2   mid           NA          17
# 10997:  1000         2   mid           NA          17
# 10998:  1000         2    up           20          10
# 10999:  1000         2    up           20           5
# 11000:  1000         2    up           40           2
  •  Tags:  
  • Related