How to get the difference between rows based on a first observation?-CodePudding

I have two countries with different starting and endings years. I have the mean earnings of different social classes. I would like to have the earnings of the first starting year = 100 as an index for each social class. Then i would like to see how they progressed from 100 in subsequent years. This in general should be easy to do, but it seems that since my countries have different numbers of observations, it is not working.

Here is the code that i have tried, but i only got missing values:

df=df %>%
  group_by(cntry, year,class_m) %>%
  mutate(base_year = (mean[first(year)]/mean)*100)

Here is the data:

df= structure(list(cntry = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L), .Label = c("at", "be"), class = "factor"), 
    year = structure(c(4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 6L, 6L, 
    6L, 6L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 10L, 
    10L, 10L, 10L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 13L, 
    13L, 13L, 13L, 14L, 14L, 14L, 14L, 15L, 15L, 15L, 15L, 16L, 
    16L, 16L, 16L, 17L, 17L, 17L, 17L, 18L, 18L, 18L, 18L, 19L, 
    19L, 19L, 19L, 20L, 20L, 20L, 20L, 1L, 1L, 1L, 1L, 2L, 2L, 
    2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 6L, 
    6L, 6L, 6L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 
    10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 
    13L, 13L, 13L, 13L, 14L, 14L, 14L, 14L, 15L, 15L, 15L, 15L, 
    16L, 16L, 16L, 16L, 17L, 17L, 17L, 17L, 18L, 18L, 18L, 18L
    ), .Label = c("1995", "1997", "2000", "2003", "2004", "2005", 
    "2006", "2007", "2008", "2009", "2010", "2011", "2012", "2013", 
    "2014", "2015", "2016", "2017", "2018", "2019"), class = "factor"), 
    class_m = structure(c(1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 
    2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 
    1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 
    4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 
    3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 
    2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 
    1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 
    4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 
    3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 
    2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L), .Label = c("Low-skilled working class", 
    "Skilled working class", "Middle class", "Upper class"), class = "factor"), 
    mean = c(21667.3165297756, 31141.2479100646, 38694.5317839067, 
    48897.5586114381, 21893.6782367936, 29866.0796003899, 36846.1057208349, 
    46115.8225807015, 19914.101136956, 30201.1848571751, 36688.5006276306, 
    44334.4349912073, 20505.9102212244, 30071.1070352498, 37093.4347815202, 
    44630.7476325564, 20265.9465807599, 29827.9369893851, 40549.4855257344, 
    48107.2865241041, 22378.7756708627, 31334.7756747725, 39981.9785570756, 
    50347.8600052063, 23101.010596959, 31412.9240693068, 40458.6454333296, 
    51740.898756006, 19805.2965921531, 30817.6682795387, 41165.6041754244, 
    52782.5026014194, 19078.5626059941, 30499.5262897878, 41177.4423103889, 
    51240.6014436097, 20393.1169949116, 29796.8273849528, 39234.5103600113, 
    50494.5284121857, 20786.560760249, 31306.6058474771, 40854.36428628, 
    50339.5860855376, 20033.5844477617, 30424.7651611075, 39659.447696875, 
    49191.4195426966, 18851.261369003, 30412.4669765863, 41857.2930659497, 
    51097.4975692186, 22333.7894908968, 30863.010648668, 41852.0093099513, 
    54112.6228115753, 21709.19921875, 30039.5068801246, 41097.4541047158, 
    49862.44140625, 20113.5586718618, 30733.8952367545, 41658.1716627373, 
    51754.4018503782, 21818.4311173551, 33225.8409123812, 43882.2512500977, 
    51037.5228976151, 15858.5028150308, 18782.8272745439, 22871.4020551682, 
    26288.6154497508, 26599.1650213236, 31720.3186300543, 41940.5016413888, 
    51187.0060567118, 18564.6510736198, 21526.72898147, 24807.2116933588, 
    29207.4658820585, 23058.5603825146, 31862.7097532934, 37588.62928007, 
    45160.9518839946, 25495.8949453907, 31851.1999874662, 38276.6899334939, 
    46318.331560595, 23165.6350767837, 32586.7829065825, 37256.5740814167, 
    45285.0662561028, 23975.7581116063, 30787.3910726117, 37346.8507982085, 
    45180.6091420909, 23786.1529599028, 32413.707905246, 38596.3467614532, 
    47026.6344280445, 24272.92088131, 31167.7104944988, 37745.6268718255, 
    46128.4799968946, 24583.9968164343, 29819.2298432657, 40053.8477213667, 
    48223.1556254353, 23227.04705051, 29611.9190298389, 39086.0012315702, 
    46742.9511396314, 20980.1647228858, 29627.2417955117, 38648.6829503705, 
    45677.0658477392, 21397.8125304146, 30675.2233482807, 40735.634479222, 
    46355.3748374436, 22836.5595055445, 29859.0336509053, 40335.3885497182, 
    47934.8837121327, 21465.185981748, 30436.1330929852, 40091.5582937488, 
    48743.3268548605, 21375.6534656544, 31060.2359133816, 40006.7183770635, 
    47618.8685730448, 20901.803025412, 29971.1886677767, 39526.0725185188, 
    46793.098588355, 21710.1246251194, 30894.12481284, 39699.3077814615, 
    47179.3071888513)), class = c("grouped_df", "tbl_df", "tbl", 
"data.frame"), row.names = c(NA, -140L), groups = structure(list(
    cntry = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("at", 
    "be"), class = "factor"), year = structure(c(4L, 5L, 6L, 
    7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 
    19L, 20L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 
    13L, 14L, 15L, 16L, 17L, 18L), .Label = c("1995", "1997", 
    "2000", "2003", "2004", "2005", "2006", "2007", "2008", "2009", 
    "2010", "2011", "2012", "2013", "2014", "2015", "2016", "2017", 
    "2018", "2019"), class = "factor"), .rows = structure(list(
        1:4, 5:8, 9:12, 13:16, 17:20, 21:24, 25:28, 29:32, 33:36, 
        37:40, 41:44, 45:48, 49:52, 53:56, 57:60, 61:64, 65:68, 
        69:72, 73:76, 77:80, 81:84, 85:88, 89:92, 93:96, 97:100, 
        101:104, 105:108, 109:112, 113:116, 117:120, 121:124, 
        125:128, 129:132, 133:136, 137:140), ptype = integer(0), class = c("vctrs_list_of", 
    "vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -35L), .drop = TRUE))

CodePudding user response：

Is this what you want?

df %>%
  group_by(cntry, class_m) %>%
  mutate(base_year = 100 * (1 - (first(mean) - mean) / mean))


# # A tibble: 140 x 5
# # Groups:   cntry, class_m [8]
#    cntry year  class_m                     mean base_year
#    <fct> <fct> <fct>                      <dbl>     <dbl>
#  1 at    2003  Low-skilled working class 21667.     100  
#  2 at    2003  Skilled working class     31141.     100  
#  3 at    2003  Middle class              38695.     100  
#  4 at    2003  Upper class               48898.     100  
#  5 at    2004  Low-skilled working class 21894.     101. 
#  6 at    2004  Skilled working class     29866.      95.7
#  7 at    2004  Middle class              36846.      95.0
#  8 at    2004  Upper class               46116.      94.0
#  9 at    2005  Low-skilled working class 19914.      91.2
# 10 at    2005  Skilled working class     30201.      96.9
# # ... with 130 more rows