I have two countries with different starting and endings years. I have the mean earnings of different social classes. I would like to have the earnings of the first starting year = 100 as an index for each social class. Then i would like to see how they progressed from 100 in subsequent years. This in general should be easy to do, but it seems that since my countries have different numbers of observations, it is not working.
Here is the code that i have tried, but i only got missing values:
df=df %>%
group_by(cntry, year,class_m) %>%
mutate(base_year = (mean[first(year)]/mean)*100)
Here is the data:
df= structure(list(cntry = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L), .Label = c("at", "be"), class = "factor"),
year = structure(c(4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 6L, 6L,
6L, 6L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 10L,
10L, 10L, 10L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 13L,
13L, 13L, 13L, 14L, 14L, 14L, 14L, 15L, 15L, 15L, 15L, 16L,
16L, 16L, 16L, 17L, 17L, 17L, 17L, 18L, 18L, 18L, 18L, 19L,
19L, 19L, 19L, 20L, 20L, 20L, 20L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 6L,
6L, 6L, 6L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L,
10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L,
13L, 13L, 13L, 13L, 14L, 14L, 14L, 14L, 15L, 15L, 15L, 15L,
16L, 16L, 16L, 16L, 17L, 17L, 17L, 17L, 18L, 18L, 18L, 18L
), .Label = c("1995", "1997", "2000", "2003", "2004", "2005",
"2006", "2007", "2008", "2009", "2010", "2011", "2012", "2013",
"2014", "2015", "2016", "2017", "2018", "2019"), class = "factor"),
class_m = structure(c(1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L,
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L,
1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L,
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L,
1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L,
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L), .Label = c("Low-skilled working class",
"Skilled working class", "Middle class", "Upper class"), class = "factor"),
mean = c(21667.3165297756, 31141.2479100646, 38694.5317839067,
48897.5586114381, 21893.6782367936, 29866.0796003899, 36846.1057208349,
46115.8225807015, 19914.101136956, 30201.1848571751, 36688.5006276306,
44334.4349912073, 20505.9102212244, 30071.1070352498, 37093.4347815202,
44630.7476325564, 20265.9465807599, 29827.9369893851, 40549.4855257344,
48107.2865241041, 22378.7756708627, 31334.7756747725, 39981.9785570756,
50347.8600052063, 23101.010596959, 31412.9240693068, 40458.6454333296,
51740.898756006, 19805.2965921531, 30817.6682795387, 41165.6041754244,
52782.5026014194, 19078.5626059941, 30499.5262897878, 41177.4423103889,
51240.6014436097, 20393.1169949116, 29796.8273849528, 39234.5103600113,
50494.5284121857, 20786.560760249, 31306.6058474771, 40854.36428628,
50339.5860855376, 20033.5844477617, 30424.7651611075, 39659.447696875,
49191.4195426966, 18851.261369003, 30412.4669765863, 41857.2930659497,
51097.4975692186, 22333.7894908968, 30863.010648668, 41852.0093099513,
54112.6228115753, 21709.19921875, 30039.5068801246, 41097.4541047158,
49862.44140625, 20113.5586718618, 30733.8952367545, 41658.1716627373,
51754.4018503782, 21818.4311173551, 33225.8409123812, 43882.2512500977,
51037.5228976151, 15858.5028150308, 18782.8272745439, 22871.4020551682,
26288.6154497508, 26599.1650213236, 31720.3186300543, 41940.5016413888,
51187.0060567118, 18564.6510736198, 21526.72898147, 24807.2116933588,
29207.4658820585, 23058.5603825146, 31862.7097532934, 37588.62928007,
45160.9518839946, 25495.8949453907, 31851.1999874662, 38276.6899334939,
46318.331560595, 23165.6350767837, 32586.7829065825, 37256.5740814167,
45285.0662561028, 23975.7581116063, 30787.3910726117, 37346.8507982085,
45180.6091420909, 23786.1529599028, 32413.707905246, 38596.3467614532,
47026.6344280445, 24272.92088131, 31167.7104944988, 37745.6268718255,
46128.4799968946, 24583.9968164343, 29819.2298432657, 40053.8477213667,
48223.1556254353, 23227.04705051, 29611.9190298389, 39086.0012315702,
46742.9511396314, 20980.1647228858, 29627.2417955117, 38648.6829503705,
45677.0658477392, 21397.8125304146, 30675.2233482807, 40735.634479222,
46355.3748374436, 22836.5595055445, 29859.0336509053, 40335.3885497182,
47934.8837121327, 21465.185981748, 30436.1330929852, 40091.5582937488,
48743.3268548605, 21375.6534656544, 31060.2359133816, 40006.7183770635,
47618.8685730448, 20901.803025412, 29971.1886677767, 39526.0725185188,
46793.098588355, 21710.1246251194, 30894.12481284, 39699.3077814615,
47179.3071888513)), class = c("grouped_df", "tbl_df", "tbl",
"data.frame"), row.names = c(NA, -140L), groups = structure(list(
cntry = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("at",
"be"), class = "factor"), year = structure(c(4L, 5L, 6L,
7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L,
19L, 20L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L,
13L, 14L, 15L, 16L, 17L, 18L), .Label = c("1995", "1997",
"2000", "2003", "2004", "2005", "2006", "2007", "2008", "2009",
"2010", "2011", "2012", "2013", "2014", "2015", "2016", "2017",
"2018", "2019"), class = "factor"), .rows = structure(list(
1:4, 5:8, 9:12, 13:16, 17:20, 21:24, 25:28, 29:32, 33:36,
37:40, 41:44, 45:48, 49:52, 53:56, 57:60, 61:64, 65:68,
69:72, 73:76, 77:80, 81:84, 85:88, 89:92, 93:96, 97:100,
101:104, 105:108, 109:112, 113:116, 117:120, 121:124,
125:128, 129:132, 133:136, 137:140), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -35L), .drop = TRUE))
CodePudding user response:
Is this what you want?
df %>%
group_by(cntry, class_m) %>%
mutate(base_year = 100 * (1 - (first(mean) - mean) / mean))
# # A tibble: 140 x 5
# # Groups: cntry, class_m [8]
# cntry year class_m mean base_year
# <fct> <fct> <fct> <dbl> <dbl>
# 1 at 2003 Low-skilled working class 21667. 100
# 2 at 2003 Skilled working class 31141. 100
# 3 at 2003 Middle class 38695. 100
# 4 at 2003 Upper class 48898. 100
# 5 at 2004 Low-skilled working class 21894. 101.
# 6 at 2004 Skilled working class 29866. 95.7
# 7 at 2004 Middle class 36846. 95.0
# 8 at 2004 Upper class 46116. 94.0
# 9 at 2005 Low-skilled working class 19914. 91.2
# 10 at 2005 Skilled working class 30201. 96.9
# # ... with 130 more rows
