I have this data frame:
# A tibble: 8 x 7
Date_Time_GMT_3 LoggerID_SiteCode value month year day time
<dttm> <chr> <dbl> <chr> <dbl> <int> <Period>
1 2021-08-01 12:00:00 X20819740_X3WR_S_Statio~ 19.9 Aug 2021 1 12H 0M 0S
2 2021-08-01 12:00:00 X21092860_X3WR_U_Compare 19.3 Aug 2021 1 12H 0M 0S
3 2021-08-01 12:00:00 X20676906_X1WR_S_Statio~ 18.8 Aug 2021 1 1H 45M 0S
4 2021-08-01 12:00:00 X21092863_X1WR_U_Compare NA Aug 2021 1 1H 45M 0S
5 2021-08-01 12:00:00 X20817726_14WR_S_Statio~ 19.9 Aug 2021 1 2H 15M 0S
6 2021-08-01 12:00:00 X21092877_14WR_U_Compare 19.3 Aug 2021 1 2H 15M 0S
7 2021-08-01 12:00:00 X20819740_X3WR_S_Statio~ 17.2 Sept 2021 1 1H 45M 0S
8 2021-08-01 12:00:00 X21092860_X3WR_U_Compare 17.0 Sept 2021 1 1H 45M 0S
My personal data frame is quite a bit bigger, the code above only shows the first couple rows.
I am trying to calculate the difference in values for LoggerID_SiteCode strings that contain the same SiteCode, for example when they both contain X3WR and therefore calculating the difference between X20819740_X3WR_S_Stationary and X21092860_X3WR_U_Compare. But it's important that when I calculate the difference between these 2 values, they are values that are occurring at the same date/time. Does anyone know how to do this?
Data
df = structure(list(Date_Time_GMT_3 = structure
(list(sec = c(0, 0, 0, 0, 0, 0, 0, 0),
min = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), hour = c(12L, 12L,
12L, 12L, 12L, 12L, 12L, 12L), mday = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), mon = c(7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L), year = c(121L, 121L, 121L, 121L, 121L, 121, 121, 121),
wday = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), yday = c(212L, 212L, 212L,
212L, 212L, 212L, 212L, 212L), isdst = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), zone = c("EST",
"EST", "EST", "EST", "EST", "EST", "EST", "EST"), gmtoff = c(NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_)), tzone = "EST", class = c("POSIXlt",
"POSIXt")), LoggerID_SiteCode = c("X20819740_X3WR_S_Stationary",
"X21092860_X3WR_U_Compare", "X20676906_X1WR_S_Stationary", "X21092863_X1WR_U_Compare",
"X20817726_14WR_S_Stationary", "X21092877_14WR_U_Compare",
"X20819740_X3WR_S_Stationary", "X21092860_X3WR_U_Compare"), value = c(19.948,
19.321, 18.806, NA, 19.948, 19.300, 17.201, 17.012), month = c("Aug", "Aug", "Aug", "Aug",
"Aug", "Aug", "Sept", "Sept"), year = c(2021, 2021, 2021, 2021, 2021, 2021, 2021, 2021),
day = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), time = new("Period", .Data = c(0,
0, 0, 0, 0, 0, 0, 0),
year = c(0, 0, 0, 0, 0, 0, 0, 0), month = c(0,
0, 0, 0, 0, 0, 0, 0), day = c(0, 0, 0, 0, 0, 0, 0, 0), hour = c(12, 12,
1, 1, 2, 2, 1, 1), minute = c(0, 0, 45, 45, 15, 15, 45, 45))), row.names = c(NA,
-8L), class = c("tbl_df", "tbl", "data.frame"))
CodePudding user response:
You can use group_by and substr. If you don't want NA to appear, add na.rm = T to the diff function.
df %>%
group_by(month, year, day, time,
SiteCode = substr(LoggerID_SiteCode, 11, 14)) %>%
summarise(diff = diff(value))
# A tibble: 4 x 6
# Groups: month, year, day, time [4]
month year day time SiteCode diff
<chr> <dbl> <int> <Period> <chr> <dbl>
1 Aug 2021 1 1H 45M 0S X1WR NA
2 Aug 2021 1 2H 15M 0S 14WR -0.648
3 Aug 2021 1 12H 0M 0S X3WR -0.627
4 Sept 2021 1 1H 45M 0S X3WR -0.189
