How to filter rows that occur sequentially that fit multiple conditions using dplyr-CodePudding

I am trying to filter my data for certain conditions using dplyr. The conditions need to be applied to rows that occur sequentially. I have a condition that would apply to the first row (not the first row of the df) and then I am interested to see if there if the following row (the second row) meets another set of conditions. If the conditions are met for the 1st and 2nd row then I want to be able to see the 1st, 2nd, 3rd, and 4th row

Here are the conditions that I want to filter for

The Close of Row1 is greater than or equal to the Open of Row1
The Close of Row2 is greater than or equal to the Open of Row2
The Close of Row2 is less than or equal to the High of Row1

Here is an example of my data.

structure(list(Date = c("01/14/2022", "01/14/2022", "01/14/2022", 
"01/14/2022", "01/14/2022", "01/14/2022", "01/14/2022", "01/14/2022", 
"01/14/2022", "01/14/2022", "01/14/2022", "01/14/2022", "01/14/2022", 
"01/14/2022", "01/14/2022", "01/14/2022", "01/14/2022", "01/14/2022", 
"01/14/2022", "01/14/2022", "01/14/2022", "01/14/2022", "01/14/2022", 
"01/14/2022", "01/14/2022", "01/14/2022", "01/14/2022", "01/14/2022", 
"01/14/2022", "01/14/2022", "01/14/2022", "01/18/2022", "01/18/2022", 
"01/18/2022", "01/18/2022", "01/18/2022", "01/18/2022"), Time = c("08:05", 
"08:10", "08:15", "08:20", "08:25", "08:30", "08:35", "08:40", 
"08:45", "08:50", "08:55", "09:00", "09:05", "09:10", "09:15", 
"09:20", "09:25", "09:30", "09:35", "09:40", "09:45", "09:50", 
"09:55", "10:00", "10:05", "10:10", "10:15", "10:20", "10:25", 
"10:30", "10:35", "09:00", "09:05", "09:10", "09:15", "09:20", 
"09:25"), Open = c(4618.75, 4621.25, 4621, 4617, 4622, 4624.75, 
4623.75, 4620.75, 4617.25, 4613.75, 4612, 4610, 4613.25, 4614, 
4617.75, 4619, 4619.75, 4619.5, 4619.25, 4618.25, 4634.75, 4635.75, 
4635.25, 4643.25, 4650.75, 4640.75, 4646, 4641.25, 4654.5, 4639.5, 
4638, 4610.5, 4611.5, 4612, 4611.75, 4610, 4605.75), High = c(4621.75, 
4623.75, 4623.25, 4625.5, 4625, 4625, 4625, 4621.75, 4620.25, 
4617.75, 4612.5, 4614, 4614.5, 4619.75, 4621.25, 4623, 4621.5, 
4622.5, 4624.25, 4638.75, 4640.5, 4645.75, 4644.25, 4652.5, 4653.5, 
4649.5, 4651.75, 4655.75, 4655, 4642.75, 4640, 4612.25, 4612.75, 
4612.5, 4612.5, 4610.5, 4608.75), Low = c(4612.75, 4617.5, 4616, 
4617, 4620.5, 4620.5, 4616.75, 4616, 4611.75, 4610.25, 4606.75, 
4607, 4609.5, 4614, 4616.5, 4616, 4616.25, 4617.75, 4614.5, 4615.25, 
4629.25, 4633.5, 4633.25, 4642.5, 4635.75, 4640.5, 4639.5, 4641, 
4638.75, 4633.75, 4631.5, 4609.5, 4609.75, 4609.25, 4608, 4604.5, 
4604.75), Close = c(4621.25, 4620.75, 4616.75, 4622, 4624.5, 
4623.75, 4620.75, 4617, 4613.5, 4612, 4609.75, 4613.25, 4614, 
4617.5, 4619, 4619.75, 4619.5, 4619.25, 4618, 4635, 4635.5, 4635.25, 
4643.5, 4651, 4641, 4646.25, 4641.5, 4654.75, 4639.5, 4637.75, 
4639.5, 4611.5, 4612, 4611.5, 4609.5, 4605.75, 4608.75), Up = c(6712L, 
3316L, 2396L, 3218L, 2246L, 2817L, 5079L, 3495L, 4783L, 4404L, 
5390L, 5139L, 2908L, 3943L, 4140L, 4026L, 3068L, 6227L, 26196L, 
31057L, 17725L, 20980L, 16256L, 16262L, 18580L, 12499L, 11163L, 
13486L, 10349L, 11161L, 12024L, 1619L, 2010L, 1503L, 1772L, 2987L, 
1731L), Down = c(6157L, 3075L, 2774L, 3197L, 2199L, 2564L, 5702L, 
3750L, 4015L, 3527L, 5204L, 3302L, 3206L, 3767L, 3059L, 3899L, 
2770L, 6792L, 24774L, 28216L, 18406L, 20660L, 15670L, 15362L, 
20526L, 11039L, 11507L, 12231L, 11981L, 11810L, 12161L, 1552L, 
1985L, 1763L, 2402L, 3947L, 1362L)), row.names = c(NA, -37L), class = "data.frame")

Here is the expected output.

         Date  Time    Open    High     Low   Close   Up Down
12 01/14/2022 09:00 4610.00 4614.00 4607.00 4613.25 5139 3302
13 01/14/2022 09:05 4613.25 4614.50 4609.50 4614.00 2908 3206
14 01/14/2022 09:10 4614.00 4619.75 4614.00 4617.50 3943 3767
15 01/14/2022 09:15 4617.75 4621.25 4616.50 4619.00 4140 3059
32 01/18/2022 09:00 4610.50 4612.25 4609.50 4611.50 1619 1552
33 01/18/2022 09:05 4611.50 4612.75 4609.75 4612.00 2010 1985
34 01/18/2022 09:10 4612.00 4612.50 4609.25 4611.50 1503 1763
35 01/18/2022 09:15 4611.75 4612.50 4608.00 4609.50 1772 2402

CodePudding user response：

If there are overlaps, this will print the overlapping rows only once (i.e. all unique rows in the set of all possibly-overlapping groups)

library(dplyr, warn.conflicts = FALSE)

df %>% 
  mutate(cond = (Close >= Open) & lead(Close >= Open) & lead(Close) <= High) %>% 
  slice(unique(c(outer(0:3, which(cond), ' '))))
#>          Date  Time    Open    High     Low   Close    Up  Down  cond
#> 1  01/14/2022 08:20 4617.00 4625.50 4617.00 4622.00  3218  3197  TRUE
#> 2  01/14/2022 08:25 4622.00 4625.00 4620.50 4624.50  2246  2199 FALSE
#> 3  01/14/2022 08:30 4624.75 4625.00 4620.50 4623.75  2817  2564 FALSE
#> 4  01/14/2022 08:35 4623.75 4625.00 4616.75 4620.75  5079  5702 FALSE
#> 5  01/14/2022 09:00 4610.00 4614.00 4607.00 4613.25  5139  3302  TRUE
#> 6  01/14/2022 09:05 4613.25 4614.50 4609.50 4614.00  2908  3206 FALSE
#> 7  01/14/2022 09:10 4614.00 4619.75 4614.00 4617.50  3943  3767  TRUE
#> 8  01/14/2022 09:15 4617.75 4621.25 4616.50 4619.00  4140  3059  TRUE
#> 9  01/14/2022 09:20 4619.00 4623.00 4616.00 4619.75  4026  3899 FALSE
#> 10 01/14/2022 09:25 4619.75 4621.50 4616.25 4619.50  3068  2770 FALSE
#> 11 01/14/2022 09:30 4619.50 4622.50 4617.75 4619.25  6227  6792 FALSE
#> 12 01/14/2022 09:40 4618.25 4638.75 4615.25 4635.00 31057 28216  TRUE
#> 13 01/14/2022 09:45 4634.75 4640.50 4629.25 4635.50 17725 18406 FALSE
#> 14 01/14/2022 09:50 4635.75 4645.75 4633.50 4635.25 20980 20660 FALSE
#> 15 01/14/2022 09:55 4635.25 4644.25 4633.25 4643.50 16256 15670 FALSE
#> 16 01/14/2022 10:35 4638.00 4640.00 4631.50 4639.50 12024 12161  TRUE
#> 17 01/18/2022 09:00 4610.50 4612.25 4609.50 4611.50  1619  1552  TRUE
#> 18 01/18/2022 09:05 4611.50 4612.75 4609.75 4612.00  2010  1985 FALSE
#> 19 01/18/2022 09:10 4612.00 4612.50 4609.25 4611.50  1503  1763 FALSE
#> 20 01/18/2022 09:15 4611.75 4612.50 4608.00 4609.50  1772  2402 FALSE

^{Created on 2022-01-20 by the reprex package (v2.0.1)}

This also works

df %>% 
  mutate(cond = (Close >= Open) & lead(Close >= Open) & lead(Close) <= High) %>% 
  filter(purrr::reduce(1:3, ~ .x | lag(.x), .init = cond))