I have a CSV data in which I want to merge every two rows in each of the columns. The code below works fine on other CSVs. However, it returns an error while mutating with this particular dataset.
How can I fix this? The purpose is to merge every two rows in each of the columns in this dataset.
Error:
Error in mutate():
! Problem while computing id = rep(1:(n()/2), each = 2).
x id must be size 179 or 1, not 178.
Code:
df = df%>% mutate(id = rep(1:(n()/2), each = 2)) %>%
group_by(id) %>%
summarize(across(date_received:permit,
~trimws(paste0(.x, collapse = " "))))
Data:
structure(list(date_received = c("Existing/Pool", "1/26/2021",
"Existing ", "2/3/2021", "Existing", "2/10/2021", "Abandonment",
"2/11/2021", "Holding Tank", "2/11/2021", "Existing/Additon",
"2/19/2021", "Abandonment", "2/23/2021", "Existing ", "3/2/2021",
"Existing", "3/2/2021", "Existing", "3/2/2021", "Existing/Pool",
"3/3/2021", "Abandonment", "2/11/2021", "Abandonment", "3/8/2021",
"Existing", "3/10/2021", "Abandonment", "3/10/2021", "Abandonment",
"3/11/2021", "Abandonment", "3/25/2021", "Abandonment", "3/30/2021",
"Abandonment", "4/6/2021", "Abandonment", "4/2/2021", "Abandonment",
"4/7/2021", "Abandonment", "4/16/2021", "Holding Tank", "4/19/2021",
"Existing/Pool", "4/21/2021", "Holding Tank", "4/26/2021", "Abandonment",
"4/27/2021", "Existing Addition", "4/1/2020", "Portables", "4/30/2021",
"Existing ", "5/17/2021", "Existing", "5/27/2021", "Existing",
"5/27/2021", "Abandonment ", "6/2/2021", "Existing", "6/4/2021",
"Abandonment", "6/9/2021", "Portables", "6/14/2021", "Pool Addition",
"6/28/2021", "Existing Addition", "6/28/20201", "Abandonment",
"6/29/2021", "Holding Tank", "6/21/2021", "Abandonment", "7/2/2021",
"Pool Addition", "7/6/2021", "Holding Tank", "7/9/2021", "Abandonment",
"7/15/2021", "Abandonment", "7/16/2021", "Holding Tank", "7/27/2021",
"Abandonment", "7/28/2021", "Holding Tank", "7/28/2021", "Existing",
"8/3/2021", "Abandonment", "8/3/2021", "Existing", "8/17/2021",
"Abandonment", "9/8/2021", "Abandonment", "9/14/2021", "Existing",
"8/19/2021", "Existing ", "9/20/2021", "Restroom", "9/20/2021",
"Holding Tank", "9/22/2021", "Existing", "10/7/2021", "Holding Tank",
"10/5/2021", "Existing", "10/8/2021", "Existing", "10/13/2021",
"Holding Tank", "10/15/2021", "Abandonment", "10/18/2021", "Abandonment",
"10/19/2021", "Holding Tank", "10/22/2021", "Abandonment", "10/25/2021",
"Holding Tank", "10/25/2021", "Restroom", "10/27/2021", "Abandonment",
"11/1/2021", "Restroom", "11/4/2021", "Abandonment", "11/4/2021",
"Existing", "6/14/2021", "Existing", "11/9/2021", "Holding Tank",
"11/10/2021", "Abandonment", "11/18/2021", "Abandonment", "11/18/2021",
"Abandonment", "11/18/2021", "Abandonment", "11/18/2021", "Existing",
"11/22/2021", "Abandonment ", "11/30/2021", "Abandonment ", "Abandonment ",
"11/30/2021", "Abandonment ", "11/30/2021", "Abandonment ", "11/30/2021",
"Abandonment ", "11/30/2021", "Abandonment ", "11/30/2021", "Abandonment ",
"11/17/2021", "Abandonment", "12/3/2021", "Abandonment", "12/3/2021",
"Abandonment", "12/13/2021", "Abandonment", "12/14/2021"), date_approved = c("HC",
"1/27/2021", "CE", "2/3/2021", "NC", "2/10/2021", "VV", "2/11/2021",
"SB", "2/18/2021", "VV", "2/19/2021", "HC", "2/23/2021", "CE",
"3/2/2021", "NC", "3/2/2021", "NC", "3/2/2021", "HC", "3/3/2021",
"", "2/19/2021", "", "3/8/2021", "NC", "3/10/2021", "NC", "3/10/2021",
"HC", "3/11/2021", "HC", "3/25/2021", "NC", "3/30/2021", "SB",
"4/6/2021", "NC", "4/2/2021", "NC", "4/7/2021", "HC", "4/19/2021",
"NC", "4/20/2021", "VV", "4/21/2021", "VV", "4/26/2021", "NC",
"4/27/2021", "SB", "4/28/2021", "CE", "5/3/2021", "", "5/17/2021",
"VV", "5/27/2021", "VV", "5/27/2021", "", "6/3/2021", "", "6/4/2021",
"", "6/9/2021", "NC", "6/14/2021", "SB", "6/28/2021", "", "6/28/2021",
"", "6/29/2021", "", "7/1/2021", "VV", "7/6/2021", "SB", "7/6/2021",
"VV", "7/9/2021", "SB", "7/15/2021", "HC", "7/16/2021", "NC",
"7/27/2021", "HC", "7/28/2021", "HC", "7/28/2021", "", "8/3/2021",
"", "8/4/2021", "", "8/17/2021", "SB", "9/8/2021", "NC", "9/14/2021",
"", "9/14/2021", "NC", "9/20/2021", "CE", "9/20/2021", "NC",
"9/22/2021", "", "10/7/2021", "", "10/7/2021", "", "10/8/2021",
"SB", "10/13/2021", "", "10/15/2021", "", "10/19/2021", "HC",
"10/19/2021", "HC", "10/22/2021", "", "10/25/2021", "", "10/25/2021",
"", "10/28/2021", "", "11/1/2021", "", "11/4/2021", "", "11/5/2021",
"", "11/9/2021", "", "11/9/2021", "", "", "", "11/18/2021", "",
"11/18/2021", "", "11/18/2021", "", "11/18/2021", "", "11/22/2021",
"", "11/30/2021", "", "", "11/30/2021", "", "11/30/2021", "",
"11/30/2021", "", "11/30/2021", "", "11/30/2021", "", "11/17/2021",
"", "12/3/2021", "", "12/3/2021", "", "12/3/2021", "", "12/15/2021"
), ap = c("", "AP-21-001", "", "AP-21-002", "", "AP-21-003",
"", "AP-21-004", "", "AP-21-005", "", "AP-21-006", "", "AP-21-007",
"", "AP-21-008", "", "AP-21-009", "", "AP-21-010", "", "AP-21-011",
"", "AP-21-012", "", "AP-21-013", "", "AP-21-014", "", "AP-21-015",
"", "AP-21-016", "", "AP-21-017", "", "AP-21-018", "", "AP-21-019",
"", "AP-21-020", "", "AP-21-021", "", "AP-21-022", "", "AP-21-023",
"", "AP-21-024", "", "AP-21-025", "", "AP-21-026", "", "AP-21-027",
"", "AP-21-028", "", "AP-21-029", "", "AP-21-030", "", "AP-21-031",
"", "AP-21-032", "", "AP-21-033", "", "AP-21-034", "", "AP-21-034",
"", "AP-21-035", "", "AP-21-036", "", "AP-21-037", "", "AP-21-038",
"", "AP-21-039", "", "AP-21-040", "", "AP-21-041", "", "AP-21-042",
"", "AP-21-043", "", "AP-21-044", "", "AP-21-045", "", "AP-21-046",
"", "AP-21-047", "", "AP-21-048", "", "AP-21-049", "", "AP-21-050",
"", "AP-21-051", "", "AP-21-052", "", "AP-21-053", "", "AP-21-054",
"", "AP-21-055", "", "AP-21-056", "", "AP-21-057", "", "AP-21-058",
"", "AP-21-059", "", "AP-21-060", "", "AP-21-061", "", "AP-21-062",
"", "AP-21-063", "", "AP-21-064", "", "AP-21-065", "", "AP-21-066",
"", "AP-21-067", "", "AP-21-068", "", "AP-21-069", "", "AP-21-070",
"", "AP-21-071", "", "AP-21-072", "", "AP-21-073", "", "AP-21-074",
"", "AP-21-075", "", "AP-21-076", "", "AP-21-077", "", "AP-21-078",
"", "", "AP-21-080", "", "AP-21-081", "", "AP-21-082", "", "AP-21-083",
"", "AP-21-084", "", "AP-21-085", "", "AP-21-086", "", "AP-21-087",
"", "AP-21-088", "", "AP-21-089"), permit = c("", "06-SE-2224986",
"", "06-SE-2227597", "", "06-SM-2239687", "", "06-", "", "06-SM-2241767",
"", "06-SE-2243142", "", "06-SE-2244287", "", "06-SE-2246427",
"", "06-SM-2246414", "", "06-SM-2246400", "", "06-SE-2247078",
"", "06-SM-2242491", "", "06-SM-2248390", "", "06-SM-2249588",
"", "06-SM-2249687", "", "06-SE-2250115", "", "06-SE-2254824",
"", "06-SM-2255976", "", "06-SM-2269208", "", "06-SM-2267853",
"", "06-SM-2269667", "", "06-SM-2273781", "", "06-SM-2273781",
"", "06-SE-2274109", "", "06-SE-2275335", "", "06-SM-2276119",
"", "06-SM-2045833", "", "06-SE-2278410", "", "06-SM-2292821",
"", "06-SE-2296438", "", "06-SE-", "", "06-SM-2308253", "", "06-SE-2308815",
"", "06-SM-2310625", "", "06-SM-2310987", "", "06-SM-2316223",
"", "", "", "06-SE-2316719", "", "06-SM-2327886", "", "06-SE-2328314",
"", "06-SM-2328437", "", "06-SE-2329714", "", "06-SM-2331651",
"", "06-SE-2332160", "", "06-SM-2335477", "", "06-SE-2335855",
"", "06-SE-2335957", "", "06-SE-2337766", "", "06-SE-2338114",
"", "06-SE-2351769", "", "06-SM-2368036", "", "06-SM-2369869",
"", "06-SE-23651769", "", "06-SM-2371532", "", "06-SE-2371710",
"", "06-SM-237238", "", "06-SM-2407385", "", "06-SM-2407543",
"", "06-SM-2407855", "", "06-SM-2409931", "", "06-SE-2410182",
"", "06-SE-2411071", "", "06-SE-2411268", "", "06-SE-2412725",
"", "06-SE-2413035", "", "06-SE-2413090", "", "06-SE-2413883",
"", "06-SM-2415394", "", "06-SE-2416802", "", "06-SE-2417359",
"", "06-SM-2408057", "", "06-SM-2418381", "", "06-SE-2418776",
"", "06-SM-2421477", "", "06-SM-2421473", "", "06-SM-", "", "06-SM-",
"", "06-SM-2410994", "Pool", "AP1766846", "Classroom E", "Classroom F",
"AP1766856", "Classroom C", "AP1766858", "Classroom A", "AP1766862",
"Landscape Area", "AP1766864", "Classroom B", "AO1766867", "",
"06-SE-2420566", "", "06-E-2425187", "", "", "", "06-SM-2424110",
"", "06-SE-2428789")), class = "data.frame", row.names = c(NA,
-179L))
CodePudding user response:
To assign the 'id', you can increase the count of rows by 1, replicate each number by twice and then select the first n.
df%>% mutate(id = rep(1:((n() 1)/2), each = 2)[1:n()]) %>% group_by(id) %>%
summarize(across(date_received:permit,
~trimws(paste0(.x, collapse = " "))))
Result:
# A tibble: 90 x 5
id date_received date_approved ap permit
<int> <chr> <chr> <chr> <chr>
1 1 Existing/Pool 1/26/2021 HC 1/27/2021 AP-21-001 06-SE-2224986
2 2 Existing 2/3/2021 CE 2/3/2021 AP-21-002 06-SE-2227597
3 3 Existing 2/10/2021 NC 2/10/2021 AP-21-003 06-SM-2239687
4 4 Abandonment 2/11/2021 VV 2/11/2021 AP-21-004 06-
5 5 Holding Tank 2/11/2021 SB 2/18/2021 AP-21-005 06-SM-2241767
6 6 Existing/Additon 2/19/2021 VV 2/19/2021 AP-21-006 06-SE-2243142
7 7 Abandonment 2/23/2021 HC 2/23/2021 AP-21-007 06-SE-2244287
8 8 Existing 3/2/2021 CE 3/2/2021 AP-21-008 06-SE-2246427
9 9 Existing 3/2/2021 NC 3/2/2021 AP-21-009 06-SM-2246414
10 10 Existing 3/2/2021 NC 3/2/2021 AP-21-010 06-SM-2246400
# ... with 80 more rows
CodePudding user response:
Use group_by(id = (1:n() 1) %/% 2):
library(dplyr)
df %>%
group_by(id = (1:n() 1) %/% 2) %>%
summarize(across(date_received:permit,
~ trimws(paste0(.x, collapse = " "))))
# # A tibble: 90 × 5
# id date_received date_approved ap permit
# <dbl> <chr> <chr> <chr> <chr>
# 1 1 Existing/Pool 1/26/2021 HC 1/27/2021 AP-21-001 06-SE-2224986
# 2 2 Existing 2/3/2021 CE 2/3/2021 AP-21-002 06-SE-2227597
# 3 3 Existing 2/10/2021 NC 2/10/2021 AP-21-003 06-SM-2239687
# 4 4 Abandonment 2/11/2021 VV 2/11/2021 AP-21-004 06-
# 5 5 Holding Tank 2/11/2021 SB 2/18/2021 AP-21-005 06-SM-2241767
# 6 6 Existing/Additon 2/19/2021 VV 2/19/2021 AP-21-006 06-SE-2243142
# 7 7 Abandonment 2/23/2021 HC 2/23/2021 AP-21-007 06-SE-2244287
# 8 8 Existing 3/2/2021 CE 3/2/2021 AP-21-008 06-SE-2246427
# 9 9 Existing 3/2/2021 NC 3/2/2021 AP-21-009 06-SM-2246414
# 10 10 Existing 3/2/2021 NC 3/2/2021 AP-21-010 06-SM-2246400
# # … with 80 more rows
%/% indicates integer division. If n() is 15, the grouping variable will gives
(1:15 1) %/% 2
# [1] 1 1 2 2 3 3 4 4 5 5 6 6 7 7 8
