Assuming I have the following data:
library(tidyquant)
data <- tq_get("^GSPC", from = "2010-01-01", to = "2015-06-01")
Which is a time-series dataset and I want to compute the number of consecutive days the open > close and also when the open <= close.
So I will have something like:
3 consecutive days up = 100
2 consecutive days up = 200
1 consecutive days up = 340
1 consecutive days down = 400
2 consecutive days down = 500
3 consecutive days down = 650
etc.
The calculation will be
# of 3 consecutive up days / # of total days = 100 / 2190 = 0.0457
I would like to do this for both the up and down days...
EDIT:
I think this is a little closer to what I am trying:
First I compute if the open > close then I compute the consecutive days using the rle. I would like to do this for day2, day3 etc.
out <- data %>%
filter(date >= "2015-01-01") %>%
mutate(
openHigherClose = ifelse(open >= close, "up", "down"),
openHigherClose2 = ifelse(open >= close, 1, 0)
)
rle(cumsum(out$openHigherClose2))
Data:
data <- structure(list(symbol = c("^GSPC", "^GSPC", "^GSPC", "^GSPC",
"^GSPC", "^GSPC", "^GSPC", "^GSPC", "^GSPC", "^GSPC", "^GSPC",
"^GSPC", "^GSPC", "^GSPC", "^GSPC", "^GSPC", "^GSPC", "^GSPC",
"^GSPC", "^GSPC", "^GSPC", "^GSPC", "^GSPC", "^GSPC", "^GSPC",
"^GSPC", "^GSPC", "^GSPC", "^GSPC", "^GSPC", "^GSPC", "^GSPC",
"^GSPC", "^GSPC", "^GSPC", "^GSPC", "^GSPC", "^GSPC", "^GSPC",
"^GSPC", "^GSPC"), date = structure(c(16526, 16527, 16531, 16532,
16533, 16534, 16535, 16538, 16539, 16540, 16541, 16542, 16545,
16546, 16547, 16548, 16549, 16552, 16553, 16554, 16555, 16556,
16559, 16560, 16561, 16562, 16563, 16566, 16567, 16568, 16569,
16570, 16573, 16574, 16575, 16576, 16577, 16581, 16582, 16583,
16584), class = "Date"), open = c(2067.629883, 2060.030029, 2064.870117,
2080.790039, 2076.939941, 2081.290039, 2091.51001, 2102.030029,
2092.280029, 2097.820068, 2105.959961, 2102.580078, 2084.110107,
2102.820068, 2098.27002, 2107.209961, 2112.800049, 2119.290039,
2108.350098, 2112.48999, 2105.52002, 2087.379883, 2110.22998,
2112.629883, 2091.26001, 2079.959961, 2092.129883, 2115.560059,
2102.870117, 2099.620117, 2100.429932, 2122.070068, 2121.300049,
2129.449951, 2127.790039, 2125.550049, 2130.360107, 2125.340088,
2105.129883, 2122.27002, 2120.659912), high = c(2067.629883,
2072.169922, 2086.98999, 2089.810059, 2086.689941, 2093.310059,
2102.610107, 2107.649902, 2098.620117, 2111.909912, 2111.300049,
2102.580078, 2103.939941, 2109.639893, 2109.97998, 2120.48999,
2120.919922, 2125.919922, 2116.040039, 2113.649902, 2105.52002,
2108.409912, 2120.949951, 2115.23999, 2098.419922, 2092.899902,
2117.659912, 2117.689941, 2105.060059, 2110.189941, 2121.449951,
2123.889893, 2131.780029, 2133.02002, 2134.719971, 2134.280029,
2132.149902, 2125.340088, 2126.219971, 2122.27002, 2120.659912
), low = c(2048.379883, 2057.320068, 2056.52002, 2076.100098,
2073.300049, 2074.290039, 2091.51001, 2092.330078, 2083.23999,
2097.820068, 2100.02002, 2072.370117, 2084.110107, 2094.379883,
2091.050049, 2103.189941, 2112.800049, 2107.040039, 2094.889893,
2097.409912, 2077.590088, 2087.379883, 2110.22998, 2088.459961,
2067.929932, 2074.98999, 2092.129883, 2104.580078, 2085.570068,
2096.040039, 2100.429932, 2116.810059, 2120.01001, 2124.5, 2122.590088,
2122.949951, 2126.060059, 2099.179932, 2105.129883, 2112.860107,
2104.889893), close = c(2059.689941, 2066.959961, 2080.620117,
2076.330078, 2081.899902, 2091.179932, 2102.060059, 2092.429932,
2095.840088, 2106.629883, 2104.98999, 2081.179932, 2100.399902,
2097.290039, 2107.959961, 2112.929932, 2117.689941, 2108.919922,
2114.76001, 2106.850098, 2085.51001, 2108.290039, 2114.48999,
2089.459961, 2080.149902, 2088, 2116.100098, 2105.330078, 2099.120117,
2098.47998, 2121.100098, 2122.72998, 2129.199951, 2127.830078,
2125.850098, 2130.820068, 2126.060059, 2104.199951, 2123.47998,
2120.790039, 2107.389893), volume = c(3543270000, 3095960000,
3302970000, 3065510000, 3265330000, 3172360000, 3156200000, 2908420000,
3301270000, 4013760000, 3434120000, 3627600000, 3000160000, 3243410000,
3348480000, 3636670000, 3375780000, 3438750000, 3546270000, 4074970000,
4509680000, 3379390000, 3091580000, 3793950000, 3792210000, 3676640000,
3399440000, 2992670000, 3139520000, 3374260000, 3225740000, 3092080000,
2888190000, 3296030000, 3025880000, 3070460000, 2571860000, 3342130000,
3127960000, 2980350000, 3927390000), adjusted = c(2059.689941,
2066.959961, 2080.620117, 2076.330078, 2081.899902, 2091.179932,
2102.060059, 2092.429932, 2095.840088, 2106.629883, 2104.98999,
2081.179932, 2100.399902, 2097.290039, 2107.959961, 2112.929932,
2117.689941, 2108.919922, 2114.76001, 2106.850098, 2085.51001,
2108.290039, 2114.48999, 2089.459961, 2080.149902, 2088, 2116.100098,
2105.330078, 2099.120117, 2098.47998, 2121.100098, 2122.72998,
2129.199951, 2127.830078, 2125.850098, 2130.820068, 2126.060059,
2104.199951, 2123.47998, 2120.790039, 2107.389893)), row.names = c(NA,
-41L), class = c("tbl_df", "tbl", "data.frame"))
CodePudding user response:
A bit quick and dirty, just to get the correct counts based on your dput data
library(data.table)
setDT(data)
data[, isdayup := open > close]
counts <- rle(data$isdayup)
data.table(consecutive_days = counts$lengths, isdayup = counts$values)[, .(n = .N), by = .(consecutive_days, isdayup)]
I did not do any formatting, but I am sure you can proceed based on this summary table
consecutive_days isdayup n
1: 1 TRUE 5
2: 2 FALSE 4
3: 3 FALSE 3
4: 2 TRUE 6
5: 1 FALSE 4
6: 3 TRUE 1
