This question is very similar to this one Make coefficient for all dates/categories, what is different is a few things in the return_coef function. You will see that I can generate the coefficients for each day/category, but when I ask to do it for everyone at once, I get the following error:
Error in UseMethod("select") :
no applicable method for 'select' applied to an object of class "character"
Executable code below:
library(dplyr)
library(tidyverse)
library(lubridate)
df1 <- structure(
list(date1= c("2021-06-26","2021-06-26","2021-06-26","2021-06-26"),
date2 = c("2021-06-27","2021-07-01","2021-07-02","2021-07-03"),
Category = c("ABC","ABC","ABC","ABC"),
Week= c("Saturday","Wednesday","Thurday","Saturday"),
DR1 = c(5,4,1,1),
DRM01 = c(8,4,1,0), DRM02= c(7,4,2,0),DRM03= c(6,9,5,0),
DRM04 = c(5,5,4,0),DRM05 = c(5,5,4,0),DRM06 = c(7,5,4,0),DRM07 = c(2,5,4,0),DRM08 = c(2,5,4,0)),
class = "data.frame", row.names = c(NA, -4L))
return_coef <- function(df1, dmda, CategoryChosse, var1, var2, gnum=0, graf=1) {
x<-df1 %>% select(starts_with("DRM0"))
x<-cbind(df1, setNames(df1$DR1 - x, paste0(names(x), "_PV")))
PV<-select(x, date2,Week, Category, DR1, ends_with("PV"))
med<-PV %>%
group_by(Category,Week) %>%
dplyr::summarize(dplyr::across(ends_with("PV"), median))
SPV<-df1%>%
inner_join(med, by = c('Category', 'Week')) %>%
mutate(across(matches("^DRM0\\d $"), ~.x
get(paste0(cur_column(), '_PV')),
.names = '{col}_{col}_PV')) %>%
select(date1:Category, DRM01_DRM01_PV:last_col())
SPV<-data.frame(SPV)
mat1 <- df1 %>%
dplyr::filter(date2 == dmda, Category == CategoryChosse) %>%
select(starts_with("DRM0")) %>%
pivot_longer(cols = everything()) %>%
arrange(desc(row_number())) %>%
mutate(cs = cumsum(value)) %>%
dplyr::filter(cs == 0) %>%
pull(name)
(dropnames <- paste0(mat1,"_",mat1, "_PV"))
SPV <- SPV %>%
filter(date2 == dmda, Category == CategoryChosse) %>%
select(-any_of(dropnames))
if(length(grep("DRM0", names(SPV))) == 0) {
SPV[head(mat1,10)] <- NA_real_
}
datas <-SPV %>%
dplyr::filter(date2 == ymd(dmda)) %>%
group_by(Category) %>%
dplyr::summarize(dplyr::across(starts_with("DRM0"), sum)) %>%
pivot_longer(cols= -Category, names_pattern = "DRM0(. )", values_to = "val") %>%
mutate(name = readr::parse_number(name))
colnames(datas)[-1]<-c(var1,var2)
datas$days <- datas[[as.name(var1)]]
datas$numbers <- datas[[as.name(var2)]]
datas <- datas %>%
group_by(Category) %>%
slice((as.Date(dmda) - min(as.Date(df1$date1) [
df1$Category == first(Category)])):max(days) 1) %>%
ungroup
m<-df1 %>%
group_by(Category,Week) %>%
dplyr::summarize(dplyr::across(starts_with("DR1"), mean))
m<-subset(m, Week == df1$Week[match(ymd(dmda), ymd(df1$date2))] & Category == CategoryChosse)$DR1
if (nrow(datas)<=2){
val<-as.numeric(m)
}
else{
mod <- nls(numbers ~ b1*days^2 b2,start = list(b1 = 0,b2 = 0),data = datas, algorithm = "port")
coef<-coef(mod)[2]
val<-as.numeric(coef(mod)[2])
}
return(val)
}
All<-cbind(df1 %>% select(date2, Category), coef = mapply(return_coef, df1$date2, df1$Category))
Error in UseMethod("select") :
no applicable method for 'select' applied to an object of class "character"
If I want to know the coefficient for each one separately, I can do it.
return_coef(df1, "2021-06-27","ABC", var1=0,var2=1)
[1] 6.539702
return_coef(df1, "2021-07-01","ABC", var1=0,var2=1)
[1] 4
return_coef(df1, "2021-07-02","ABC", var1=0,var2=1)
[1] 1
return_coef(df1, "2021-07-03","ABC", var1=0,var2=1)
[1] 3
CodePudding user response:
Two problems:
The first argument for your
return_coeffunction is adata.framenameddf1, yet you are calling it withdf1$date2(a string). I think you should instead start withmapply(return_coef, list(df1), df1$date2, df1$Category)(though this does error currently, see the next bullet).
The
list(df1)in this case means that the wholedf1will be passed as the first argument for each of the pairs fromdf1$date2anddf1$Category.It now fails with
argument "var1" is missing, with no default, but I suspect you were working towards that. I'll choose a couple of random names and ... something happens.
Ultimately, the function is fine as-is, just change your mapply use as:
mapply(return_coef, list(df1), df1$date2, df1$Category, var1 = "a1", var2 = "a2")
# [1] 6.539702 4.000000 1.000000 3.000000
Because both var1 and var2 are length-1, they are recycled for all calls to return_coef (as their named arguments).
Since you're using dplyr, this can be neatly put into a pipe a little more directly than using cbind(...):
library(dplyr)
df1 %>%
transmute(
date2, Category,
coef = mapply(return_coef, list(cur_data()), date2, Category, var1 = "a1", var2 = "a2")
)
# date2 Category coef
# 1 2021-06-27 ABC 6.539702
# 2 2021-07-01 ABC 4.000000
# 3 2021-07-02 ABC 1.000000
# 4 2021-07-03 ABC 3.000000
I use transmute instead of a preceding select(date2, Category) because the function needs variables present in the whole frame. I could easily have done mutate(coef=..) %>% select(date2, Category, coef) as well.
