Create a column that takes the first value of another column and subsequent values are the scaler of-CodePudding

I am trying to create a new column called g_it in a grouped data frame where the first value for each group will be the initial value in the column called exp and the subsequent values are (1 - 0.1) * lag(g_it) exp.

I believe purrr:accumulate is what I'm looking for but I'm not sure how to set it up.

My data is:

structure(list(group = c(1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 
4, 4, 4, 4), exp = c(0.493735461892577, 0.501836433242221, 0.4916437138759, 
0.515952808021378, 0.503295077718154, 0.49179531615882, 0.504874290524285, 
0.507383247051292, 0.505757813516535, 0.496946116128436, 0.515117811684508, 
0.503898432364114, 0.493787594194582, 0.477853001128225, 0.511249309181431, 
0.499550663909848)), class = "data.frame", row.names = c(NA, 
-16L))

Expected output:

 group   exp           g_it
 1       0.4937355     0.4937355
 1       0.5018364     0.94619835
 1       0.4916437     1.343222215
 1       0.5159528     1.724852794
 2       0.5032951     0.5032951
 2       0.4917953     0.94476089
 2       0.5048743     1.355159101
 2       0.5073832     1.727026391
 3       0.5057578     0.5057578
 3       0.4969461     0.95212812
 3       0.5151178     1.372033108
 3       0.5038984     1.738728197
 4       0.4937876     0.4937876
 4       0.477853      0.92226184
 4       0.5112493     1.341284956
 4       0.4995507     1.70670716

CodePudding user response：

If you provide a function to accumulate with the ~ syntax, .x is the "accumulated" (previous) value and .y is the "next" value.

df <- structure(list(group = c(1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 
4, 4, 4, 4), exp = c(0.493735461892577, 0.501836433242221, 0.4916437138759, 
0.515952808021378, 0.503295077718154, 0.49179531615882, 0.504874290524285, 
0.507383247051292, 0.505757813516535, 0.496946116128436, 0.515117811684508, 
0.503898432364114, 0.493787594194582, 0.477853001128225, 0.511249309181431, 
0.499550663909848)), class = "data.frame", row.names = c(NA, 
-16L))

library(dplyr, warn.conflicts = F)
library(purrr)

df %>% 
  group_by(group) %>%  
  mutate(g_it = accumulate(exp, ~ (1 - 0.1)*.x   .y))
#> # A tibble: 16 × 3
#> # Groups:   group [4]
#>    group   exp  g_it
#>    <dbl> <dbl> <dbl>
#>  1     1 0.494 0.494
#>  2     1 0.502 0.946
#>  3     1 0.492 1.34 
#>  4     1 0.516 1.72 
#>  5     2 0.503 0.503
#>  6     2 0.492 0.945
#>  7     2 0.505 1.36 
#>  8     2 0.507 1.73 
#>  9     3 0.506 0.506
#> 10     3 0.497 0.952
#> 11     3 0.515 1.37 
#> 12     3 0.504 1.74 
#> 13     4 0.494 0.494
#> 14     4 0.478 0.922
#> 15     4 0.511 1.34 
#> 16     4 0.500 1.71

^{Created on 2022-01-10 by the reprex package (v2.0.1)}

CodePudding user response：

A base R option using ave Reduce

transform(
  df,
  g_it = ave(
    exp,
    group,
    FUN = function(v) {
      Reduce(
        function(x, y) 0.9 * x   y,
        v,
        accumulate = TRUE
      )
    }
  )
)

gives

   group       exp      g_it
1      1 0.4937355 0.4937355
2      1 0.5018364 0.9461983
3      1 0.4916437 1.3432222
4      1 0.5159528 1.7248528
5      2 0.5032951 0.5032951
6      2 0.4917953 0.9447609
7      2 0.5048743 1.3551591
8      2 0.5073832 1.7270264
9      3 0.5057578 0.5057578
10     3 0.4969461 0.9521281
11     3 0.5151178 1.3720331
12     3 0.5038984 1.7387283
13     4 0.4937876 0.4937876
14     4 0.4778530 0.9222618
15     4 0.5112493 1.3412850
16     4 0.4995507 1.7067071

CodePudding user response：

Another possible solution, based only on dplyr and cumsum:

library(dplyr)

df %>% 
  group_by(group) %>% 
  mutate(g_it = cumsum((1 - 0.1)^(row_number() - 1) * exp)) %>%  ungroup

#> # A tibble: 16 × 3
#>    group   exp  g_it
#>    <dbl> <dbl> <dbl>
#>  1     1 0.494 0.494
#>  2     1 0.502 0.945
#>  3     1 0.492 1.34 
#>  4     1 0.516 1.72 
#>  5     2 0.503 0.503
#>  6     2 0.492 0.946
#>  7     2 0.505 1.35 
#>  8     2 0.507 1.72 
#>  9     3 0.506 0.506
#> 10     3 0.497 0.953
#> 11     3 0.515 1.37 
#> 12     3 0.504 1.74 
#> 13     4 0.494 0.494
#> 14     4 0.478 0.924
#> 15     4 0.511 1.34 
#> 16     4 0.500 1.70