I have the following data.table:
| Date | Segment | Variable | value |
|---|---|---|---|
| 31-12-2021 | XXX | aaa_1 | 10 |
| 31-12-2021 | XXX | bbb_1 | 11 |
| 31-12-2021 | XXX | ccc_1 | 12 |
| 31-01-2022 | XXX | aaa_2 | 13 |
| 31-01-2022 | XXX | bbb_2 | 14 |
| 31-01-2022 | XXX | ccc_2 | 15 |
| 31-12-2021 | YYY | aaa_1 | 16 |
| 31-12-2021 | YYY | bbb_1 | 17 |
| 31-12-2021 | YYY | ccc_1 | 18 |
| 31-01-2022 | YYY | aaa_2 | 19 |
| 31-01-2022 | YYY | bbb_2 | 20 |
| 31-01-2022 | YYY | ccc_2 | 21 |
And the desired output would be:
| Date | Segment | aaa | bbb | ccc |
|---|---|---|---|---|
| 31-12-2021 | XXX | 10 | 11 | 12 |
| 31-01-2022 | XXX | 13 | 14 | 15 |
| 31-12-2021 | YYY | 16 | 17 | 18 |
| 31-01-2022 | YYY | 19 | 20 | 21 |
Any idea? I'm blocked at this point.
CodePudding user response:
We can separate the 'Variable' column into 'two' and then use pivot_wider to reshape to 'wide' format
library(dplyr)
library(tidyr)
df1 %>%
separate(Variable, into = c('Var1', 'Var2')) %>%
pivot_wider(names_from = Var1, values_from = value) %>%
select(-Var2)
-output
# A tibble: 4 × 5
Date Segment aaa bbb ccc
<chr> <chr> <int> <int> <int>
1 31-12-2021 XXX 10 11 12
2 31-01-2022 XXX 13 14 15
3 31-12-2021 YYY 16 17 18
4 31-01-2022 YYY 19 20 21
Or use dcast
library(data.table)
dcast(setDT(df1), Date Segment ~ trimws(Variable, whitespace = "_.*"))
Date Segment aaa bbb ccc
1: 31-01-2022 XXX 13 14 15
2: 31-01-2022 YYY 19 20 21
3: 31-12-2021 XXX 10 11 12
4: 31-12-2021 YYY 16 17 18
data
df1 <- structure(list(Date = c("31-12-2021", "31-12-2021", "31-12-2021",
"31-01-2022", "31-01-2022", "31-01-2022", "31-12-2021", "31-12-2021",
"31-12-2021", "31-01-2022", "31-01-2022", "31-01-2022"), Segment = c("XXX",
"XXX", "XXX", "XXX", "XXX", "XXX", "YYY", "YYY", "YYY", "YYY",
"YYY", "YYY"), Variable = c("aaa_1", "bbb_1", "ccc_1", "aaa_2",
"bbb_2", "ccc_2", "aaa_1", "bbb_1", "ccc_1", "aaa_2", "bbb_2",
"ccc_2"), value = 10:21), class = "data.frame", row.names = c(NA,
-12L))
CodePudding user response:
This is just reshaping. Since you mentioned data.table:
library(data.table)
dcast(Date Segment ~ Variable, value.var = "value",
data = DT[, Variable := gsub("_[0-9] $", "", Variable])
# Date Segment aaa bbb ccc
# 1: 31-01-2022 XXX 13 14 15
# 2: 31-01-2022 YYY 19 20 21
# 3: 31-12-2021 XXX 10 11 12
# 4: 31-12-2021 YYY 16 17 18
Data
DT <- setDT(structure(list(Date = c("31-12-2021", "31-12-2021", "31-12-2021", "31-01-2022", "31-01-2022", "31-01-2022", "31-12-2021", "31-12-2021", "31-12-2021", "31-01-2022", "31-01-2022", "31-01-2022"), Segment = c("XXX", "XXX", "XXX", "XXX", "XXX", "XXX", "YYY", "YYY", "YYY", "YYY", "YYY", "YYY"), Variable = c("aaa_1", "bbb_1", "ccc_1", "aaa_2", "bbb_2", "ccc_2", "aaa_1", "bbb_1", "ccc_1", "aaa_2", "bbb_2", "ccc_2"), value = 10:21), class = c("data.table", "data.frame"), row.names = c(NA, -12L)))
