I previously created a for loop to identify periods when water level decreased. This worked fine for smaller continuous time series data.
library(tidyverse)
library(lubridate)
level_data <- c(10:4, 20:9, 16:5, rep(0, 3))
times_stamp <- seq(ymd_hms('2015-07-22 12:15:00'), ymd_hms('2015-07-22 20:30:00'), by = '15 mins')
precip_data <- c(rep(0, 10), 1:4, rep(0, 10), 1:5, rep(0, 5))
maxP.neg <- .1
# Create objects for holding the start and end dates. These lists should end up
# the same length, so that each start date has a corresponding end date.
startDates <- c()
endDates <- c()
recede <- 0 # This is a switch to keep track of whether a recession period is in progress
for (i in 2:length(level_data)) { # i.e. start at the second data point
diffQ <- level_data[i] - level_data[i - 1] # Calculate difference between current and previous timestamp
if (diffQ < 0 && # If difference is negative (i.e. receding) AND
recede == 0 && # a recession period has not already begun (recede == 0) AND
precip_data <= maxP.neg) { # min. dry period criteria is met ...
startDates <- append(startDates, times_stamp[i]) # Record the start time of the recession period
recede <- 1 # Change recede to 1 to indicate a recession period has begun
} else if (diffQ >= 0 && # If the difference becomes positive
recede == 1) { # and a recession period was in progress...
endDates <- append(endDates, times_stamp[i - 1]) # Record the previous timestamp as the end date of the recession
recede <- 0 # Set recede back to 0 to show the recession period has ended
} else { # Otherwise just continue to the next data point.
next
}
}
However, I want to perform the same analysis over a larger dataset where there are time breaks. I thought to split the data into a list of dataframes and use lapply and a custom function.
This is what I have come up with but I am not getting the same output as the for loop method?
diff_Q <- diff(level_data)
date_time1 <-
seq(ymd_hms('2015-07-22 12:15:00'),
ymd_hms('2015-07-22 20:15:00'),
by = '15 mins')
date_time2 <-
seq(ymd_hms('2015-07-25 08:00:00'),
ymd_hms('2015-07-25 16:00:00'),
by = '15 mins')
cum_precip <- c(rep(0, 10), 1:4, rep(0, 10), 1:5, rep(0, 4))
df1 <-
data.frame(date_time1, diff_Q, cum_precip) %>% rename(date_time = date_time1)
df2 <-
data.frame(date_time2, diff_Q, cum_precip) %>% rename(date_time = date_time2)
recede_ls <- list(df1, df2)
startDates2 <- c()
endDates2 <- c()
RA.function <- function(x) {
recede <- 0
if (diff_Q < 0 &&
recede == 0 &&
cum_precip <= maxP.neg) {
startDates2 <- x$date_time
recede <- 1
} else if (diffQ >= 0 &&
recede == 1) {
endDates2 <- x$date_time[-1]
recede <-
0
} else {
next
}
}
lapply(recede_ls, RA.function)
Thanks for your help!
CodePudding user response:
library(tidyverse)
library(lubridate)
level_data <- c(10:4, 20:9, 16:5, rep(0, 3))
times_stamp <- seq(ymd_hms('2015-07-22 12:15:00'), ymd_hms('2015-07-22 20:30:00'), by = '15 mins')
precip_data <- c(rep(0, 10), 1:4, rep(0, 10), 1:5, rep(0, 5))
df <- data.frame(date_time = times_stamp,
level_data = level_data,
cum_precip = precip_data)
dfs <- list(df, df)
RA.function <- function(x) {
startDates2 <- c()
endDates2 <- c()
recede <- 0
maxP.neg <- 0.1
for(i in 2:nrow(df)){
diffQ <- level_data[i] - level_data[i - 1]
if (diffQ < 0 && recede == 0 && x[i,"cum_precip"] <= maxP.neg) {
startDates2 <- c(startDates2, paste(x[i, "date_time"]))
recede <- 1
} else if (diffQ >= 0 && recede == 1) {
endDates2 <- c(endDates2, paste(x[i-1,"date_time"]))
recede <- 0
}
}
return(cbind(startDates2, endDates2))
}
lapply(dfs, RA.function)
#> [[1]]
#> startDates2 endDates2
#> [1,] "2015-07-22 12:30:00" "2015-07-22 13:45:00"
#> [2,] "2015-07-22 14:15:00" "2015-07-22 16:45:00"
#> [3,] "2015-07-22 17:15:00" "2015-07-22 20:00:00"
#>
#> [[2]]
#> startDates2 endDates2
#> [1,] "2015-07-22 12:30:00" "2015-07-22 13:45:00"
#> [2,] "2015-07-22 14:15:00" "2015-07-22 16:45:00"
#> [3,] "2015-07-22 17:15:00" "2015-07-22 20:00:00"
Created on 2022-01-06 by the reprex package (v2.0.1)
Is this what you are looking for? Let me know if you have any questions.
CodePudding user response:
Consider avoiding loops and use more set-based approaches such as merge with the data frame onto itself but one row back using a pseudo id based on nrow.
# BIND VECTORS INTO DATA FRAME
df <- data.frame(
level_data = c(10:4, 20:9, 16:5, rep(0, 3))
times_stamp = seq(
as.POSIXct('2015-07-22 12:15:00'),
as.POSIXct('2015-07-22 20:30:00'),
by = '15 mins'
)
precip_data = c(rep(0, 10), 1:4, rep(0, 10), 1:5, rep(0, 5))
)
# ADD id COLUMN FOR MERGING
df <- transform(df, id = 1:nrow(df))
# SELF-MERGE
df <- merge(
df,
transform(df, id = id - 1),
by = "id",
all.x = TRUE,
suffix = c("_prev", "_curr")
)
# ASSIGN COLUMNS FOR LEVEL DIFFERENCES AND RECEDE GROUPS
df <- within(
df, {
diffQ <- level_data_curr - level_data_prev # Calculate difference
recede_grp <- cumsum(diffQ >= 0) # Create recede groupings
recede_grp <- ifelse(diffQ >= 0, recede_grp-1, recede_grp)
}
)
Then, extract needed values which can involve an apply method for groupwise aggregation, tapply. (Last values may be ignored given the zero-zero end cut-off comparison.)
startDates_new <- as.POSIXct(
tapply(df$times_stamp_curr, df$recede_grp, min),
origin="1970-01-01"
)
endDates_new <- as.POSIXct(tapply(
df$times_stamp_prev, df$recede_grp, max),
origin="1970-01-01"
)
startDates_new
# 0 1 2 3
# "2015-07-22 12:30:00 CDT" "2015-07-22 14:15:00 CDT" "2015-07-22 17:15:00 CDT" "2015-07-22 20:30:00 CDT"
endDates_new
# 0 1 2 3
# "2015-07-22 13:45:00 CDT" "2015-07-22 16:45:00 CDT" "2015-07-22 20:00:00 CDT" "2015-07-22 20:15:00 CDT"
