Dataframe df is shown:
ID gene1 gene2
4602 TET2 TET2
4602 TP53 TP53
4602 TET2 TET2
5095 ASXL1 ASXL1
5095 DNMT3A DNMT3A
5095 NPM1 <NA>
I have been trying to obtain a matching barplot displaying a count (bars) for both the column gene1 and gene2. the gene1 method is the standard, while gene2 is another mutation detector that should be compared to gene1. As you see, in sample 5095 only 2 mutations were detected, while the 3rd was not reproduced.
How do i make a barplot with two bars for each ID showing the count in gene1 and gene2?
here dput()
structure(list(ID = c(4602, 4602, 4602, 5095, 5095, 5095, 5095,
4649, 4649, 4649, 5069, 5069, 5069, 5146, 5132, 5132, 5132, 5132,
5132, 5132, 4297, 4297, 4297, 4297, 4297, 4345, 4345, 4345, 4345,
4345, 4356, 4356, 4356, 4356, 4385, 4385, 4385, 4385, 4385, 4385,
4437, 4437, 4437, 4437, 4437, 4437, 4442, 4442, 4442, 4442, 4442,
4479, 4479, 4479, 4479, 4479, 4479, 4479, 4479, 4479, 4479, 4479,
4479, 4479, 4479, 4479, 4487, 4487, 4487, 4487, 4487, 4487, 4537,
4537, 4537, 4537, 4537, 4537, 4621, 4621, 4621, 4621, 4621, 4621,
4621, 4624, 4624, 4624, 4624, 4624, 4665, 4736, 4736, 4736, 4736,
4736, 4895, 4895, 4895, 4895, 4895, 4903, 4903, 4903, 4903, 4691,
4691, 4691, 4691, 4261, 4261, 4261, 4261, 4394, 4394, 4394, 4394,
4424, 4424, 4424, 4424, 4943, 4943, 4943, 5073, 5169, 5169),
gene1 = c("TET2", "TP53", "TET2", "ASXL1", "DNMT3A", "NPM1",
"PTPN11", "TP53", "TP53", "TET2", "DNMT3A", "TET2", "TET2",
"negative", "JAK2", "ASXL1", "BRAF", "CBL", "TET2", "TET2",
"DNMT3A", "IDH1", "NPM1", "CREBBP", "FLT3", "DNMT3A", "FLT3",
"NPM1", "BCOR", "KIT", "DNMT3A", "IDH1", "NRAS", "BCOR",
"KRAS", "NPM1", "PTPN11", "ETV6", "PHF6", "TET2", "DNMT3A",
"KRAS", "NPM1", "WT1", "TET2", "WT1", "DNMT3A", "FLT3", "NPM1",
"NRAS", "WT1", "DNMT3A", "IDH2", "NPM1", "SRSF2", "ATRX",
"CUX1", "CUX1", "FLT3", "GNAS", "PHF6", "PIGA", "PIGA", "PRPF40B",
"PTPN11", "TET2", "IDH1", "IDH2", "RUNX1", "U2AF1", "TET2",
"TP53", "DNMT3A", "IDH2", "ATRX", "GATA2", "STAG2", "TP53",
"IDH2", "SRSF2", "ASXL1", "GATA1", "KDM6A", "STAG2", "TP53",
"IDH2", "JAK2", "SRSF2", "ASXL1", "RIT1", "KRAS", "NPM1",
"NRAS", "NRAS", "BCOR", "MYD88", "FLT3", "NPM1", "NRAS",
"TET2", "TET2", "DNMT3A", "IDH1", "NPM1", "CREBBP", "DNMT3A",
"IDH1", "IDH2", "NPM1", "FLT3", "FLT3", "GATA2", "SH2B3",
"FLT3", "NPM1", "KDM6A", "SMC1A", "IDH2", "SRSF2", "ASXL2",
"RUNX1", "IDH2", "JAK2", "NPM1", "JAK2", "SRSF2", "STAG2"
), gene2 = c("TET2", "TP53", "TET2", "ASXL1", "DNMT3A", NA,
"PTPN11", "TP53", "TP53", "TET2", "DNMT3A", NA, "TET2", "PTEN",
NA, NA, "BRAF", "CBL", "TET2", "TET2", "JAK2", "SRSF2", NA,
"DNMT3A", "IDH1", "NPM1", NA, "FLT3", "DNMT3A", "FLT3", "NPM1",
NA, NA, "DNMT3A", "IDH1", "NRAS", "BCOR", "KRAS", "NPM1",
"PTPN11", "ETV6", "PHF6", "TET2", "DNMT3A", "KRAS", "NPM1",
NA, "TET2", NA, "DNMT3A", "FLT3", "NPM1", "NRAS", NA, NA,
"IDH2", "NPM1", "SRSF2", NA, "CALR", NA, NA, NA, NA, NA,
NA, NA, NA, NA, "IDH1", "IDH2", "RUNX1", "U2AF1", "TET2",
NA, "DNMT3A", "IDH2", NA, NA, NA, NA, "IDH2", "SRSF2", "ASXL1",
NA, NA, "KMT2D", "TP53", "IDH2", "JAK2", "SRSF2", "ASXL1",
NA, "KRAS", "NPM1", "NRAS", "NRAS", NA, NA, "FLT3", "NPM1",
"NRAS", "TET2", "TET2", "DNMT3A", "IDH1", "NPM1", "CREBBP",
"DNMT3A", "IDH1", "IDH2", "NPM1", "FLT3", "FLT3", NA, NA,
"FLT3", "NPM1", NA, "SMC1A", "IDH2", "SRSF2", NA, "RUNX1",
"IDH2", "JAK2", "NPM1")), class = "data.frame", row.names = c(NA,
-127L))
CodePudding user response:
One more:
df %>%
group_by(ID) %>%
add_count(gene1, gene2) %>%
pivot_longer(
cols = contains("gene")
) %>%
ungroup() %>%
ggplot(aes(factor(name), n, fill=value, group=value, label=value))
geom_col()
facet_wrap(.~ID, scales = "free_y")
geom_text(size = 3, position = position_stack(vjust = 0.5))
theme_classic()
xlab("")
guides(fill=FALSE)
Last try, now it should work. Now we count before pivoting:
df %>%
group_by(ID) %>%
add_count(gene1, gene2) %>%
pivot_longer(
cols = contains("gene")
) %>%
ungroup() %>%
ggplot(aes(factor(name), n, fill=value, group=value))
geom_col()
facet_wrap(.~ID, scales = "free_y")
theme(legend.position = "bottom")
guides(fill=guide_legend(nrow=2))


