I am trying to filter my data for certain conditions using dplyr. The conditions need to be applied to rows that occur sequentially. I have a condition that would apply to the first row (not the first row of the df) and then I am interested to see if there if the following row (the second row) meets another set of conditions. If the conditions are met for the 1st and 2nd row then I want to be able to see the 1st, 2nd, 3rd, and 4th row
Here are the conditions that I want to filter for
- The
Closeof Row1 is greater than or equal to theOpenof Row1 - The
Closeof Row2 is greater than or equal to theOpenof Row2 - The
Closeof Row2 is less than or equal to theHighof Row1
Here is an example of my data.
structure(list(Date = c("01/14/2022", "01/14/2022", "01/14/2022",
"01/14/2022", "01/14/2022", "01/14/2022", "01/14/2022", "01/14/2022",
"01/14/2022", "01/14/2022", "01/14/2022", "01/14/2022", "01/14/2022",
"01/14/2022", "01/14/2022", "01/14/2022", "01/14/2022", "01/14/2022",
"01/14/2022", "01/14/2022", "01/14/2022", "01/14/2022", "01/14/2022",
"01/14/2022", "01/14/2022", "01/14/2022", "01/14/2022", "01/14/2022",
"01/14/2022", "01/14/2022", "01/14/2022", "01/18/2022", "01/18/2022",
"01/18/2022", "01/18/2022", "01/18/2022", "01/18/2022"), Time = c("08:05",
"08:10", "08:15", "08:20", "08:25", "08:30", "08:35", "08:40",
"08:45", "08:50", "08:55", "09:00", "09:05", "09:10", "09:15",
"09:20", "09:25", "09:30", "09:35", "09:40", "09:45", "09:50",
"09:55", "10:00", "10:05", "10:10", "10:15", "10:20", "10:25",
"10:30", "10:35", "09:00", "09:05", "09:10", "09:15", "09:20",
"09:25"), Open = c(4618.75, 4621.25, 4621, 4617, 4622, 4624.75,
4623.75, 4620.75, 4617.25, 4613.75, 4612, 4610, 4613.25, 4614,
4617.75, 4619, 4619.75, 4619.5, 4619.25, 4618.25, 4634.75, 4635.75,
4635.25, 4643.25, 4650.75, 4640.75, 4646, 4641.25, 4654.5, 4639.5,
4638, 4610.5, 4611.5, 4612, 4611.75, 4610, 4605.75), High = c(4621.75,
4623.75, 4623.25, 4625.5, 4625, 4625, 4625, 4621.75, 4620.25,
4617.75, 4612.5, 4614, 4614.5, 4619.75, 4621.25, 4623, 4621.5,
4622.5, 4624.25, 4638.75, 4640.5, 4645.75, 4644.25, 4652.5, 4653.5,
4649.5, 4651.75, 4655.75, 4655, 4642.75, 4640, 4612.25, 4612.75,
4612.5, 4612.5, 4610.5, 4608.75), Low = c(4612.75, 4617.5, 4616,
4617, 4620.5, 4620.5, 4616.75, 4616, 4611.75, 4610.25, 4606.75,
4607, 4609.5, 4614, 4616.5, 4616, 4616.25, 4617.75, 4614.5, 4615.25,
4629.25, 4633.5, 4633.25, 4642.5, 4635.75, 4640.5, 4639.5, 4641,
4638.75, 4633.75, 4631.5, 4609.5, 4609.75, 4609.25, 4608, 4604.5,
4604.75), Close = c(4621.25, 4620.75, 4616.75, 4622, 4624.5,
4623.75, 4620.75, 4617, 4613.5, 4612, 4609.75, 4613.25, 4614,
4617.5, 4619, 4619.75, 4619.5, 4619.25, 4618, 4635, 4635.5, 4635.25,
4643.5, 4651, 4641, 4646.25, 4641.5, 4654.75, 4639.5, 4637.75,
4639.5, 4611.5, 4612, 4611.5, 4609.5, 4605.75, 4608.75), Up = c(6712L,
3316L, 2396L, 3218L, 2246L, 2817L, 5079L, 3495L, 4783L, 4404L,
5390L, 5139L, 2908L, 3943L, 4140L, 4026L, 3068L, 6227L, 26196L,
31057L, 17725L, 20980L, 16256L, 16262L, 18580L, 12499L, 11163L,
13486L, 10349L, 11161L, 12024L, 1619L, 2010L, 1503L, 1772L, 2987L,
1731L), Down = c(6157L, 3075L, 2774L, 3197L, 2199L, 2564L, 5702L,
3750L, 4015L, 3527L, 5204L, 3302L, 3206L, 3767L, 3059L, 3899L,
2770L, 6792L, 24774L, 28216L, 18406L, 20660L, 15670L, 15362L,
20526L, 11039L, 11507L, 12231L, 11981L, 11810L, 12161L, 1552L,
1985L, 1763L, 2402L, 3947L, 1362L)), row.names = c(NA, -37L), class = "data.frame")
Here is the expected output.
Date Time Open High Low Close Up Down
12 01/14/2022 09:00 4610.00 4614.00 4607.00 4613.25 5139 3302
13 01/14/2022 09:05 4613.25 4614.50 4609.50 4614.00 2908 3206
14 01/14/2022 09:10 4614.00 4619.75 4614.00 4617.50 3943 3767
15 01/14/2022 09:15 4617.75 4621.25 4616.50 4619.00 4140 3059
32 01/18/2022 09:00 4610.50 4612.25 4609.50 4611.50 1619 1552
33 01/18/2022 09:05 4611.50 4612.75 4609.75 4612.00 2010 1985
34 01/18/2022 09:10 4612.00 4612.50 4609.25 4611.50 1503 1763
35 01/18/2022 09:15 4611.75 4612.50 4608.00 4609.50 1772 2402
CodePudding user response:
If there are overlaps, this will print the overlapping rows only once (i.e. all unique rows in the set of all possibly-overlapping groups)
library(dplyr, warn.conflicts = FALSE)
df %>%
mutate(cond = (Close >= Open) & lead(Close >= Open) & lead(Close) <= High) %>%
slice(unique(c(outer(0:3, which(cond), ' '))))
#> Date Time Open High Low Close Up Down cond
#> 1 01/14/2022 08:20 4617.00 4625.50 4617.00 4622.00 3218 3197 TRUE
#> 2 01/14/2022 08:25 4622.00 4625.00 4620.50 4624.50 2246 2199 FALSE
#> 3 01/14/2022 08:30 4624.75 4625.00 4620.50 4623.75 2817 2564 FALSE
#> 4 01/14/2022 08:35 4623.75 4625.00 4616.75 4620.75 5079 5702 FALSE
#> 5 01/14/2022 09:00 4610.00 4614.00 4607.00 4613.25 5139 3302 TRUE
#> 6 01/14/2022 09:05 4613.25 4614.50 4609.50 4614.00 2908 3206 FALSE
#> 7 01/14/2022 09:10 4614.00 4619.75 4614.00 4617.50 3943 3767 TRUE
#> 8 01/14/2022 09:15 4617.75 4621.25 4616.50 4619.00 4140 3059 TRUE
#> 9 01/14/2022 09:20 4619.00 4623.00 4616.00 4619.75 4026 3899 FALSE
#> 10 01/14/2022 09:25 4619.75 4621.50 4616.25 4619.50 3068 2770 FALSE
#> 11 01/14/2022 09:30 4619.50 4622.50 4617.75 4619.25 6227 6792 FALSE
#> 12 01/14/2022 09:40 4618.25 4638.75 4615.25 4635.00 31057 28216 TRUE
#> 13 01/14/2022 09:45 4634.75 4640.50 4629.25 4635.50 17725 18406 FALSE
#> 14 01/14/2022 09:50 4635.75 4645.75 4633.50 4635.25 20980 20660 FALSE
#> 15 01/14/2022 09:55 4635.25 4644.25 4633.25 4643.50 16256 15670 FALSE
#> 16 01/14/2022 10:35 4638.00 4640.00 4631.50 4639.50 12024 12161 TRUE
#> 17 01/18/2022 09:00 4610.50 4612.25 4609.50 4611.50 1619 1552 TRUE
#> 18 01/18/2022 09:05 4611.50 4612.75 4609.75 4612.00 2010 1985 FALSE
#> 19 01/18/2022 09:10 4612.00 4612.50 4609.25 4611.50 1503 1763 FALSE
#> 20 01/18/2022 09:15 4611.75 4612.50 4608.00 4609.50 1772 2402 FALSE
Created on 2022-01-20 by the reprex package (v2.0.1)
This also works
df %>%
mutate(cond = (Close >= Open) & lead(Close >= Open) & lead(Close) <= High) %>%
filter(purrr::reduce(1:3, ~ .x | lag(.x), .init = cond))
