Home > database >  Compute consecutive days up and down
Compute consecutive days up and down

Time:01-29

Assuming I have the following data:

library(tidyquant)
data <- tq_get("^GSPC", from = "2010-01-01", to = "2015-06-01")

Which is a time-series dataset and I want to compute the number of consecutive days the open > close and also when the open <= close.

So I will have something like:

3 consecutive days up = 100

2 consecutive days up = 200

1 consecutive days up = 340

1 consecutive days down = 400

2 consecutive days down = 500

3 consecutive days down = 650

etc.

The calculation will be

# of 3 consecutive up days / # of total days = 100 / 2190 = 0.0457

I would like to do this for both the up and down days...

EDIT:

I think this is a little closer to what I am trying:

First I compute if the open > close then I compute the consecutive days using the rle. I would like to do this for day2, day3 etc.

out <- data %>%
  filter(date >= "2015-01-01") %>% 
  mutate(
    openHigherClose = ifelse(open >= close, "up", "down"),
    openHigherClose2 = ifelse(open >= close, 1, 0)
  )
rle(cumsum(out$openHigherClose2))

Data:

data <- structure(list(symbol = c("^GSPC", "^GSPC", "^GSPC", "^GSPC", 
"^GSPC", "^GSPC", "^GSPC", "^GSPC", "^GSPC", "^GSPC", "^GSPC", 
"^GSPC", "^GSPC", "^GSPC", "^GSPC", "^GSPC", "^GSPC", "^GSPC", 
"^GSPC", "^GSPC", "^GSPC", "^GSPC", "^GSPC", "^GSPC", "^GSPC", 
"^GSPC", "^GSPC", "^GSPC", "^GSPC", "^GSPC", "^GSPC", "^GSPC", 
"^GSPC", "^GSPC", "^GSPC", "^GSPC", "^GSPC", "^GSPC", "^GSPC", 
"^GSPC", "^GSPC"), date = structure(c(16526, 16527, 16531, 16532, 
16533, 16534, 16535, 16538, 16539, 16540, 16541, 16542, 16545, 
16546, 16547, 16548, 16549, 16552, 16553, 16554, 16555, 16556, 
16559, 16560, 16561, 16562, 16563, 16566, 16567, 16568, 16569, 
16570, 16573, 16574, 16575, 16576, 16577, 16581, 16582, 16583, 
16584), class = "Date"), open = c(2067.629883, 2060.030029, 2064.870117, 
2080.790039, 2076.939941, 2081.290039, 2091.51001, 2102.030029, 
2092.280029, 2097.820068, 2105.959961, 2102.580078, 2084.110107, 
2102.820068, 2098.27002, 2107.209961, 2112.800049, 2119.290039, 
2108.350098, 2112.48999, 2105.52002, 2087.379883, 2110.22998, 
2112.629883, 2091.26001, 2079.959961, 2092.129883, 2115.560059, 
2102.870117, 2099.620117, 2100.429932, 2122.070068, 2121.300049, 
2129.449951, 2127.790039, 2125.550049, 2130.360107, 2125.340088, 
2105.129883, 2122.27002, 2120.659912), high = c(2067.629883, 
2072.169922, 2086.98999, 2089.810059, 2086.689941, 2093.310059, 
2102.610107, 2107.649902, 2098.620117, 2111.909912, 2111.300049, 
2102.580078, 2103.939941, 2109.639893, 2109.97998, 2120.48999, 
2120.919922, 2125.919922, 2116.040039, 2113.649902, 2105.52002, 
2108.409912, 2120.949951, 2115.23999, 2098.419922, 2092.899902, 
2117.659912, 2117.689941, 2105.060059, 2110.189941, 2121.449951, 
2123.889893, 2131.780029, 2133.02002, 2134.719971, 2134.280029, 
2132.149902, 2125.340088, 2126.219971, 2122.27002, 2120.659912
), low = c(2048.379883, 2057.320068, 2056.52002, 2076.100098, 
2073.300049, 2074.290039, 2091.51001, 2092.330078, 2083.23999, 
2097.820068, 2100.02002, 2072.370117, 2084.110107, 2094.379883, 
2091.050049, 2103.189941, 2112.800049, 2107.040039, 2094.889893, 
2097.409912, 2077.590088, 2087.379883, 2110.22998, 2088.459961, 
2067.929932, 2074.98999, 2092.129883, 2104.580078, 2085.570068, 
2096.040039, 2100.429932, 2116.810059, 2120.01001, 2124.5, 2122.590088, 
2122.949951, 2126.060059, 2099.179932, 2105.129883, 2112.860107, 
2104.889893), close = c(2059.689941, 2066.959961, 2080.620117, 
2076.330078, 2081.899902, 2091.179932, 2102.060059, 2092.429932, 
2095.840088, 2106.629883, 2104.98999, 2081.179932, 2100.399902, 
2097.290039, 2107.959961, 2112.929932, 2117.689941, 2108.919922, 
2114.76001, 2106.850098, 2085.51001, 2108.290039, 2114.48999, 
2089.459961, 2080.149902, 2088, 2116.100098, 2105.330078, 2099.120117, 
2098.47998, 2121.100098, 2122.72998, 2129.199951, 2127.830078, 
2125.850098, 2130.820068, 2126.060059, 2104.199951, 2123.47998, 
2120.790039, 2107.389893), volume = c(3543270000, 3095960000, 
3302970000, 3065510000, 3265330000, 3172360000, 3156200000, 2908420000, 
3301270000, 4013760000, 3434120000, 3627600000, 3000160000, 3243410000, 
3348480000, 3636670000, 3375780000, 3438750000, 3546270000, 4074970000, 
4509680000, 3379390000, 3091580000, 3793950000, 3792210000, 3676640000, 
3399440000, 2992670000, 3139520000, 3374260000, 3225740000, 3092080000, 
2888190000, 3296030000, 3025880000, 3070460000, 2571860000, 3342130000, 
3127960000, 2980350000, 3927390000), adjusted = c(2059.689941, 
2066.959961, 2080.620117, 2076.330078, 2081.899902, 2091.179932, 
2102.060059, 2092.429932, 2095.840088, 2106.629883, 2104.98999, 
2081.179932, 2100.399902, 2097.290039, 2107.959961, 2112.929932, 
2117.689941, 2108.919922, 2114.76001, 2106.850098, 2085.51001, 
2108.290039, 2114.48999, 2089.459961, 2080.149902, 2088, 2116.100098, 
2105.330078, 2099.120117, 2098.47998, 2121.100098, 2122.72998, 
2129.199951, 2127.830078, 2125.850098, 2130.820068, 2126.060059, 
2104.199951, 2123.47998, 2120.790039, 2107.389893)), row.names = c(NA, 
-41L), class = c("tbl_df", "tbl", "data.frame"))

CodePudding user response:

A bit quick and dirty, just to get the correct counts based on your dput data

library(data.table)
setDT(data)

data[, isdayup := open > close]
counts <- rle(data$isdayup)
data.table(consecutive_days = counts$lengths, isdayup = counts$values)[, .(n = .N), by = .(consecutive_days, isdayup)]

I did not do any formatting, but I am sure you can proceed based on this summary table

   consecutive_days isdayup n
1:                1    TRUE 5
2:                2   FALSE 4
3:                3   FALSE 3
4:                2    TRUE 6
5:                1   FALSE 4
6:                3    TRUE 1
  •  Tags:  
  • Related