I have a huge database monthly actualizated, here i'll try to create an exemple
| Year | Month | UF | Seg | Fai | Value | LinProd | Brand |
|---|---|---|---|---|---|---|---|
| 2021 | Nov | SP | Construction | B | 1 | Prod1 | Comp.1 |
| 2021 | Nov | SP | Construction | B | 7 | Prod1 | Market |
| 2021 | Nov | SP | Construction | C | 5 | Prod2 | Market |
| 2021 | Nov | SP | Construction | B | 2 | Prod1 | Comp.2 |
| 2021 | Nov | MG | Location | A | 4 | Prod2 | Market |
| 2021 | Nov | MG | Mineration | A | 8 | Prod4 | Market |
| 2021 | Nov | MG | Mineration | B | 16 | Prod4 | Market |
| 2021 | Nov | MG | Mineration | F | 2 | Prod4 | Market |
| 2021 | Nov | MG | Mineration | A | 3 | Prod4 | Comp.1 |
| 2021 | Nov | MG | Mineration | B | 8 | Prod4 | Comp.2 |
| 2021 | Nov | MG | Mineration | F | 1 | Prod4 | Comp.2 |
This database is exported from an excel, what I need to do is basically, a subtraction, Market-(Comp.1 Comp2). Respecting all the columns. The output I need is something like:
| Year | Month | UF | Seg | Fai | Value | LinProd | Brand |
|---|---|---|---|---|---|---|---|
| 2021 | Nov | SP | Construction | B | 4 | Prod1 | Market |
| 2021 | Nov | SP | Construction | C | 5 | Prod2 | Market |
| 2021 | Nov | MG | Location | A | 4 | Prod2 | Market |
| 2021 | Nov | MG | Mineration | A | 5 | Prod4 | Market |
| 2021 | Nov | MG | Mineration | B | 8 | Prod4 | Market |
| 2021 | Nov | MG | Mineration | F | 1 | Prod4 | Market |
Where there is no subtraction, I just to be printed as it was before, as you can see in the line 5 from the fist table and in the line 3 from the second, there was no product from Comp.1 and Comp.2, so nothing happened to it
I tried this code:
t<-as.data.frame(read_xlsx("C:/Users/lucas/Documents/Base.xlsx"))
c=as.data.frame(filter(t,Brand=="Comp.1"))
c$Value=c$Value*-1
ti=as.data.frame(filter(t,Brand=="Market"))
n=as.data.frame(filter(t,Brand=="Comp.2"))
n$Value=n$Value*-1
mar=rbind(ti,c,n)
ag=aggregate(mar$Value,by=list(mar$Brand,mar$Month,mar$UF, mar$Seg,mar$Fai,mar$LinProd),FUN = sum)
write_xlsx(ag,"C:/Users/lucas/Documents/Test.xlsx" )
CodePudding user response:
An alternative dplyr solution, plus base:
dplyr
library(dplyr)
dat %>%
group_by(Year, Month, UF, Seg, Fai) %>%
mutate(Value = Value - sum(Value[ Brand %in% c("Comp.1", "Comp.2") ])) %>%
ungroup() %>%
filter(!Brand %in% c("Comp.1", "Comp.2"))
# # A tibble: 6 x 8
# Year Month UF Seg Fai Value LinProd Brand
# <int> <chr> <chr> <chr> <chr> <int> <chr> <chr>
# 1 2021 Nov SP Construction B 4 Prod1 Market
# 2 2021 Nov SP Construction C 5 Prod2 Market
# 3 2021 Nov MG Location A 4 Prod2 Market
# 4 2021 Nov MG Mineration A 5 Prod4 Market
# 5 2021 Nov MG Mineration B 8 Prod4 Market
# 6 2021 Nov MG Mineration F 1 Prod4 Market
base R
dat$Value <-
ave(dat$Value * ifelse(dat$Brand %in% c("Comp.1", "Comp.2"), -1, 1), dat[,c("Year", "Month", "UF", "Seg", "Fai")],
FUN = function(z) z sum(z[z < 0]))
dat[!dat$Brand %in% c("Comp.1", "Comp.2"),]
# Year Month UF Seg Fai Value LinProd Brand Value2
# 2 2021 Nov SP Construction B 4 Prod1 Market 4
# 3 2021 Nov SP Construction C 5 Prod2 Market 5
# 5 2021 Nov MG Location A 4 Prod2 Market 4
# 6 2021 Nov MG Mineration A 5 Prod4 Market 5
# 7 2021 Nov MG Mineration B 8 Prod4 Market 8
# 8 2021 Nov MG Mineration F 1 Prod4 Market 1
data.table
library(data.table)
# should instead use `setDT(dat)` if going this route
as.data.table(dat
)[, Value2 := Value - sum(Value[Brand %in% c("Comp.1", "Comp.2")]), by = .(Year, Month, UF, Seg, Fai)
][ !Brand %in% c("Comp.1", "Comp.2") ]
# Year Month UF Seg Fai Value LinProd Brand Value2
# 1: 2021 Nov SP Construction B 7 Prod1 Market 4
# 2: 2021 Nov SP Construction C 5 Prod2 Market 5
# 3: 2021 Nov MG Location A 4 Prod2 Market 4
# 4: 2021 Nov MG Mineration A 8 Prod4 Market 5
# 5: 2021 Nov MG Mineration B 16 Prod4 Market 8
# 6: 2021 Nov MG Mineration F 2 Prod4 Market 1
Data
dat <- structure(list(Year = c(2021L, 2021L, 2021L, 2021L, 2021L, 2021L, 2021L, 2021L, 2021L, 2021L, 2021L), Month = c("Nov", "Nov", "Nov", "Nov", "Nov", "Nov", "Nov", "Nov", "Nov", "Nov", "Nov"), UF = c("SP", "SP", "SP", "SP", "MG", "MG", "MG", "MG", "MG", "MG", "MG"), Seg = c("Construction", "Construction", "Construction", "Construction", "Location", "Mineration", "Mineration", "Mineration", "Mineration", "Mineration", "Mineration"), Fai = c("B", "B", "C", "B", "A", "A", "B", "F", "A", "B", "F"), Value = c(1L, 7L, 5L, 2L, 4L, 8L, 16L, 2L, 3L, 8L, 1L), LinProd = c("Prod1", "Prod1", "Prod2", "Prod1", "Prod2", "Prod4", "Prod4", "Prod4", "Prod4", "Prod4", "Prod4"), Brand = c("Comp.1", "Market", "Market", "Comp.2", "Market", "Market", "Market", "Market", "Comp.1", "Comp.2", "Comp.2"), Value2 = c(-4, 4, 5, -5, 4, 5, 8, 1, -6, -16, -2)), row.names = c(NA, -11L), class = "data.frame")
CodePudding user response:
Using dplyr:
library(dplyr)
df %>%
mutate(Value2 = ifelse(Brand == "Comp.1" | Brand == "Comp.2", Value*-1, Value)) %>%
group_by(Year, Month, UF, Seg, Fai, LinProd) %>%
summarize(Value_new = sum(Value2)) %>%
arrange(Seg, LinProd)
# A tibble: 6 x 7
# Groups: Year, Month, UF, Seg, Fai [6]
Year Month UF Seg Fai LinProd Value_new
<int> <chr> <chr> <chr> <chr> <chr> <dbl>
1 2021 Nov SP Construction B Prod1 4
2 2021 Nov SP Construction C Prod2 5
3 2021 Nov MG Location A Prod2 4
4 2021 Nov MG Mineration A Prod4 5
5 2021 Nov MG Mineration B Prod4 8
6 2021 Nov MG Mineration F Prod4 1
Or base R:
df$Value2 <- ifelse(df$Brand == "Comp.1" | df$Brand == "Comp.2", df$Value*-1, df$Value)
df_sum <- aggregate(Value2 ~ Year Month UF Seg Fai LinProd, data = df, FUN = sum)
df_sum[order(df_sum$Seg, df_sum$LinProd),]
Year Month UF Seg Fai LinProd Value2
1 2021 Nov SP Construction B Prod1 4
3 2021 Nov SP Construction C Prod2 5
2 2021 Nov MG Location A Prod2 4
4 2021 Nov MG Mineration A Prod4 5
5 2021 Nov MG Mineration B Prod4 8
6 2021 Nov MG Mineration F Prod4 1
Data:
> dput(df)
structure(list(Year = c(2021L, 2021L, 2021L, 2021L, 2021L, 2021L,
2021L, 2021L, 2021L, 2021L, 2021L), Month = c("Nov", "Nov", "Nov",
"Nov", "Nov", "Nov", "Nov", "Nov", "Nov", "Nov", "Nov"), UF = c("SP",
"SP", "SP", "SP", "MG", "MG", "MG", "MG", "MG", "MG", "MG"),
Seg = c("Construction", "Construction", "Construction", "Construction",
"Location", "Mineration", "Mineration", "Mineration", "Mineration",
"Mineration", "Mineration"), Fai = c("B", "B", "C", "B",
"A", "A", "B", "F", "A", "B", "F"), Value = c(1L, 7L, 5L,
2L, 4L, 8L, 16L, 2L, 3L, 8L, 1L), LinProd = c("Prod1", "Prod1",
"Prod2", "Prod1", "Prod2", "Prod4", "Prod4", "Prod4", "Prod4",
"Prod4", "Prod4"), Brand = c("Comp.1", "Market", "Market",
"Comp.2", "Market", "Market", "Market", "Market", "Comp.1",
"Comp.2", "Comp.2")), class = "data.frame", row.names = c(NA,
-11L))
