Load libraries
library(tidyverse)
library(readxl)
library(DT)
library(Matrix)
library(patchwork)
library(gridExtra)
library(viridis)
library(RColorBrewer)
library(ComplexHeatmap)
library(circlize)
library(purrr)
library(broom)
#library(broom.mixed)
library(igraph)
library(ggpubr)
library(correlation)
library(ggrepel)
library(ggalluvial)
library(Seurat)
library(clusterProfiler)
library(umap) ## https://github.com/tkonopka/umap/blob/master/vignettes/umap.Rmd
library(tidygraph)
library(ggraph)
library(scales)
library(WGCNA)
library(mixOmics)
library(eulerr)
library(lmerTest)
#library(broom.mixed)
library(emmeans)
Load data
# Malaria Explore 1536 data
data <- utils::read.delim("../data/data/explore1536/20230411_infectious_olink.tsv")
# Malaria sampleTable
sampleTable_simple <- readRDS("../data/metaData_clean/Explore1536_Malaria_sampleTable_simple.rds")
# Malaria subjectTable
subjectTable <- readRDS("../data/metaData_clean/MalariaResource_subjectTable.rds")
# Malaria clinchem data
clinchem_study_pats_acute.wide <- readRDS("../data/metaData_clean/Explore1536_ClinicalChemistry_acute.rds")
# Tropical fever Explore 1536 data
TF.long <- readRDS("../data/data_clean/Explore1536_TF_tidy_long.rds")
# Tropical fever sampleTable
TF_sampleTable <- readRDS("../data/metaData_clean/Explore1536_TF_sampleTable.rds")
# HPA v24
hpa_24.0 <- read_tsv("../data/hpa/proteinatlas_v24.tsv") %>% janitor::clean_names() %>%
mutate(secretome_location = ifelse(is.na(secretome_location),"Not secreted",secretome_location))
# HPA tissue expression v23
hpa.tissue <- read_tsv("../data/hpa/rna_tissue_consensus.tsv") %>% janitor::clean_names()
# Load cleaned malaria data
#- assays with QC warn in more than 70% of all assays
#- samples with more than 70% below LOD
data.wide <- readRDS("../data/data_clean/Explore1536_tidy_wide.rds")
data.long <- readRDS("../data/data_clean/Explore1536_tidy_long.rds")
## Explore 1536 data set - MGH Covid-19 study, Filbin et al. 2021
covid_NPXdata <- read_delim("../data/MGH_OLINK_COVID/MGH_COVID_OLINK_NPX.txt",comment = "##")
mgh.covid.meta <- read_delim("../data/MGH_OLINK_COVID/MGH_COVID_Clinical_Info.txt", comment="##") %>% janitor::clean_names()
mgh.covid.meta.key <- read_excel("../data/MGH_OLINK_COVID/variable_descriptions.xlsx")
## load tropical fever cohort data
TF_SOFA <- readRDS("../data/metaData_clean/2021213_TF_DA_TROP_SOFAscores.rds") %>% filter(diagnose_clean!="P.falciparum")
TF.long <- readRDS("../data/data_clean/Explore1536_TF_tidy_long.rds")
## loding MIP Cohort FACS data - Lautenbach et al. Cell Reports 2022
FACs_data <- read_delim("../../MalariaTravellers/data/TravellerCohort_FACS_log2cpu_long.csv")
FACS_meta <- read_delim("../../MalariaTravellers/data/TravellerCohort_SubjectTable.csv")
Set theme & colors
theme_set(theme_minimal(base_size = 6))
time3_col <- c("Acute" = "#C51B7D",
"D10" = "#E9A3C9",
"M12" = "#4D9221")
sex2_col <- c(male = "#c5b8dc",
female = "#b9d2b1")
endemic2_col <- c(primary_infected = "#998EC3",
previously_exposed = "#F1A340")
severe_5_col = c("1"="tomato",
"0"="grey80")
secretome_location_cols <- c("Secreted to blood" = "#FB8072",
"Intracellular and membrane" = "#8DD3C7",
"Secreted in other tissues" = "#B3DE69",
"Secreted to extracellular matrix" = "#80B1D3",
"Secreted in brain" = "#b9d2b1",#"#FCCDE5",
"Secreted to digestive system" = "#FDB462",
"Secreted - unknown location" = "#FFFF00",
"Secreted in male reproductive system" = sex2_col[[1]],#"#BEBADA",
"Secreted in female reproductive system" = sex2_col[[2]],
"Not secreted" = "#D9D9D9")
secretome_location_tissue_spec_cols <- c(secretome_location_cols,
c("Not secreted - Tissue enriched" = "#88419d",
"Not secreted - Tissue enhanced" = "#8c96c6",
"Not secreted - Group enriched" = "#b3cde3",
"Not secreted - Low tissue specificity" = "#edf8fb")
)
SOFA_sub_col = colorRamp2(c(0,4), c("white","red"))
patient_kclust3 <- c('3' = "#92c5de", '2' = "#f4a582", '1' = "#ca0020")
patient_kclust3_lab <- c("mild"="#92c5de", "moderate"="#f4a582", "severe"="#ca0020")
patient_kclust3_lab_conv <- c("mild"="#92c5de", "moderate"="#f4a582", "severe"="#ca0020","convalescence" ="grey50")
SOFA_sub_col = colorRamp2(c(0,4), c("white","red"))
SOFA_total_col = colorRamp2(c(min(subjectTable$SOFA_total,na.rm = TRUE),
median(subjectTable$SOFA_total,na.rm = TRUE),
max(subjectTable$SOFA_total,na.rm = TRUE)),
c(brewer.pal(3,name="PuBu")))
## dimensinality reduction theme
my_dimred_theme <- theme_classic() +
theme(axis.text = element_blank(),
axis.ticks = element_blank(),
#text = element_text(size = 12),
#legend.text = element_text(size = 10),
legend.position = "right")
## a4 pdf theme
theme_a4_pdf <- theme(axis.text.x = element_text(size=6),
axis.text.y = element_text(size=6),
axis.title.x = element_text(size=6),
axis.title.y = element_text(size=6),
## legend
legend.key.size = unit(1, 'cm'), #change legend key size
legend.key.height = unit(0.25, 'cm'), #change legend key height
legend.key.width = unit(0.25, 'cm'), #change legend key width
legend.title = element_text(size=6), #change legend title font size
legend.text = element_text(size=6),
## label
plot.title = element_blank(),
plot.subtitle = element_blank(),
plot.caption = element_blank(),
## facet_grid
strip.text.x = element_text(size = 6,face="bold"),
#strip.text.y = element_text(size = 6),
strip.placement = "outside"
)
## patchwork panel a4 pdf theme
patchwork_panel_a4_pdf <- patchwork::plot_annotation(theme = theme(plot.title = element_text(size = 12),
plot.tag = element_text(size = 16,face = 'bold')
),
tag_levels = 'A')
asym_study_id <- c("2021004")
## 2013004 - Lib 1
## 2013007 - Lib 2
## 2013008 - Lib 3
## 2018002 - Lib 4
rhapsody_study_ids <- c("2013004","2013007","2013008","2018002")
#require(clusterProfiler)
#length(unique(data$UniProt)) ## 1463
mapping_uniprot_ensembl <- bitr(unique(data$UniProt),
fromType="UNIPROT",
toType=c("SYMBOL", "ENSEMBL","ENTREZID"),
OrgDb="org.Hs.eg.db") %>%
dplyr::rename(UniProt = UNIPROT,
Symbol = SYMBOL,
Ensembl = ENSEMBL,
Entrez = ENTREZID) %>%
inner_join(data %>% dplyr::select(Assay,UniProt) %>% dplyr::distinct(),by="UniProt")
#write_delim(mapping_uniprot_ensembl, "../../2022_Explore1536FarnertLab/data/Mapping_Explore_UniProt2Ensembl.txt")
Plasma proteomic perturbation during clinical malaria - Cohort characteristics
fig1.list <- list()
(fig1.list[["general_sex_age_dist"]] <- subjectTable %>%
ggplot(aes(x=age, fill=sex)) +
geom_density(alpha=.6) +
#theme_classic() +
theme(axis.text = element_text(size=6),
axis.title = element_text(size=6),
axis.ticks.x = element_blank()) +
scale_fill_manual(values=sex2_col) +
scale_color_manual(values=sex2_col) +
labs(fill="Sex",
x="age [years]",
y="density")
)
(fig1.list[["timepoint_sex_perc"]] <-sampleTable_simple %>%
inner_join(subjectTable,by="study_id") %>%
group_by(Time,sex) %>%
tally() %>%
group_by(Time) %>%
dplyr::mutate(percent=n/sum(n)) %>%
ggplot(aes(x=Time,y=n,fill=sex)) +
geom_bar(stat="identity", position ="fill") +
geom_text(aes(label=paste0(sprintf("%1.1f", percent*100),"%")),
position=position_fill(vjust=0.5), colour="white", size =1.5) +
scale_y_continuous(labels = scales::percent,expand = c(0,.01)) +
#theme_minimal() +
theme(legend.position = "top",
axis.ticks.x = element_blank()) +
scale_fill_manual(values=sex2_col) +
labs(fill=NULL,
x=NULL,
y="Percentage")
)
(fig1.list[["timepoint_exposure"]] <- sampleTable_simple %>%
inner_join(subjectTable,by="study_id") %>%
group_by(Time,endemic) %>%
tally() %>%
group_by(Time) %>%
dplyr::mutate(percent=n/sum(n)) %>%
mutate(endemic = factor(endemic, levels=c("primary_infected","previously_exposed"))) %>%
ggplot(aes(x=Time,y=n,fill=endemic)) +
geom_bar(stat="identity", position ="fill") +
geom_text(aes(label=paste0(sprintf("%1.1f", percent*100),"%")),
position=position_fill(vjust=0.5), colour="white", size =1.5) +
scale_y_continuous(labels = scales::percent,expand = c(0,.01)) +
#theme_minimal() +
theme(legend.position = "top",
axis.ticks.x = element_blank()) +
scale_fill_manual(values=endemic2_col,labels=c("primary_infected"="primary infected","previously_exposed"="previously exposed")) +
labs(fill=NULL,
x=NULL,
y="Percentage")
)
df <- data.wide %>%
inner_join(sampleTable_simple %>%
transmute(sample_id),
by="sample_id") %>%
column_to_rownames("sample_id")
## PC calculation
pcaRes <- stats::prcomp(df,center = TRUE, scale. = TRUE)
varExp <- round(pcaRes$sdev^2 / sum(pcaRes$sdev^2) * 100)
pcaDF <- data.frame(PC1 = pcaRes$x[, 1],
PC2 = pcaRes$x[, 2]) %>%
rownames_to_column("sample_id")
## Prep for plotting
data4plot <- pcaDF %>%
dplyr::inner_join(sampleTable_simple, by="sample_id")
(pca_fig1 <- data4plot %>%
ggplot(mapping = aes(x = PC1, y = PC2, color = Time,fill=NULL, label = NULL)) +
geom_point(alpha = 0.9, size = 1) +
ggplot2::scale_color_manual(values= time3_col) +
labs(x = paste0("PC1 (", varExp[1], " %)"),
y = paste0("PC2 (", varExp[2], " %)")) +
theme_minimal() +
theme(legend.title = element_text(size = 6),
legend.text = element_text(size = 6)))
## nest data
data_nested <- data.long %>%
inner_join(sampleTable_simple, by="sample_id") %>%
group_by(UniProt,Assay) %>%
nest()
## nest data
data_nested <- data.long %>%
inner_join(sampleTable_simple, by="sample_id") %>%
left_join(subjectTable %>% transmute(study_id,
exposure = factor(endemic, levels=c("primary_infected","previously_exposed"))),
by="study_id") %>%
group_by(UniProt,Assay) %>%
nest()
lme_res <- data_nested %>%
mutate(lme.res.simple = purrr::map(data, ~ lmerTest::lmer(NPX ~ Time + exposure + (1|study_id), REML = F,
control = lme4::lmerControl(check.conv.singular = "ignore"),
data = .x %>% dplyr::filter(Time!="D10"))),
lme.res.complex = purrr::map(data, ~ lmerTest::lmer(NPX ~ Time * exposure + (1|study_id), REML = F,
control = lme4::lmerControl(check.conv.singular = "ignore"),
data = .x %>% dplyr::filter(Time!="D10"))),
lme.simple.tidy = purrr::map(lme.res.simple, ~ broom.mixed::tidy(.,)),
lme.complex.tidy = purrr::map(lme.res.complex, ~ broom.mixed::tidy(.)),
posthoc.time = purrr::map(lme.res.simple, ~ summary(contrast(emmeans(., ~ Time), method = "pairwise")) %>% tibble()),
posthoc.time_exposure = purrr::map(lme.res.complex, ~ summary(contrast(emmeans(., ~ Time * exposure), method = "pairwise")) %>% tibble())
)
## compare simple (without interaction) with complex model (interaction)
bic_aic_res <- lme_res %>%
mutate(simple_glance = purrr::map(lme.res.simple, ~(broom::glance(.))),
complex_glance = purrr::map(lme.res.complex, ~(broom::glance(.)))) %>%
unnest(cols = c(simple_glance,complex_glance),names_sep = ".") %>%
dplyr::select(Assay, contains("AIC"),contains("BIC"))
df_better_model <- bic_aic_res %>%
pivot_longer(cols=c(-UniProt,-Assay)) %>%
separate(name, into=c("model","eval"),sep = "\\.",remove = T) %>%
pivot_wider(names_from = model, values_from = value) %>%
mutate(simple_better = simple_glance < complex_glance,
simple_delta = simple_glance-complex_glance,
better_model = case_when(abs(simple_delta)>6 ~ "complex",
.default = "simple"))
df_better_model %>%
group_by(eval) %>%
count(better_model) %>%
ggplot(aes(x=better_model, y=n)) +
geom_col() +
geom_text(aes(label=n),size=2,nudge_y = 50) +
facet_wrap(~eval)
df_better_model %>% filter(eval=="BIC",
better_model=="complex")
## # A tibble: 14 × 8
## # Groups: UniProt, Assay [14]
## UniProt Assay eval simple_glance complex_glance simple_better simple_delta
## <chr> <chr> <chr> <dbl> <dbl> <lgl> <dbl>
## 1 Q9Y275 TNFSF1… BIC 276. 267. FALSE 9.66
## 2 P10586 PTPRF BIC 153. 146. FALSE 7.56
## 3 P05107 ITGB2 BIC 148. 141. FALSE 6.98
## 4 Q9HAN9 NMNAT1 BIC 401. 394. FALSE 6.73
## 5 P50135 HNMT BIC 340. 332. FALSE 8.39
## 6 Q9UL46 PSME2 BIC 199. 192. FALSE 6.96
## 7 P04179 SOD2 BIC 328. 319. FALSE 9.55
## 8 P00352 ALDH1A1 BIC 302. 290. FALSE 11.6
## 9 Q9P0V8 SLAMF8 BIC 308. 301. FALSE 6.78
## 10 Q16772 GSTA3 BIC 350. 341. FALSE 8.69
## 11 Q86SF2 GALNT7 BIC 74.3 68.0 FALSE 6.33
## 12 Q96I15 SCLY BIC 280. 273. FALSE 6.67
## 13 Q00796 SORD BIC 401. 391. FALSE 10.3
## 14 Q7Z4W1 DCXR BIC 300. 289. FALSE 11.3
## # ℹ 1 more variable: better_model <chr>
lme_res_padj <- lme_res %>%
unnest(cols="posthoc.time") %>%
filter(contrast=="Acute - M12") %>%
ungroup() %>%
mutate(p.adj = p.adjust(p.value, method="fdr"),
FDR = ifelse(p.adj <= 0.01, TRUE,FALSE)) %>%
arrange(p.adj)
assay_better_complex_model <- df_better_model %>% filter(eval=="BIC",
better_model=="complex") %>% pull(Assay)
plot(euler(
list("acute_m12" = lme_res_padj %>% filter(FDR==T) %>% pull(Assay),
"bic_complex_better" = assay_better_complex_model)),
fills = c("#C51B7D",
"white"),
quantities = TRUE,
lty = 1,#1:3,
fontsize=2,
labels = list(fontsize=7),
shape = "ellipse",adjust_labels = T)
lme_res_padj %>%
transmute(Assay, estimate,p.adj) %>%
filter(Assay %in% assay_better_complex_model) %>%
arrange(-abs(estimate))
## # A tibble: 14 × 3
## Assay estimate p.adj
## <chr> <dbl> <dbl>
## 1 SLAMF8 2.48 4.64e-16
## 2 HNMT 1.42 2.76e- 7
## 3 NMNAT1 1.18 1.77e- 3
## 4 ALDH1A1 1.13 3.00e- 6
## 5 TNFSF13B 1.04 1.30e- 6
## 6 GSTA3 1.03 5.20e- 4
## 7 SOD2 0.920 5.69e- 4
## 8 SCLY 0.905 1.54e- 5
## 9 PSME2 0.814 6.65e- 8
## 10 DCXR 0.637 3.36e- 3
## 11 ITGB2 0.623 2.43e- 8
## 12 SORD 0.391 3.10e- 1
## 13 GALNT7 -0.0573 4.89e- 1
## 14 PTPRF -0.0129 9.28e- 1
dap.res <- lme_res_padj %>%
dplyr::rename(logFC = estimate) %>%
mutate(direction = ifelse(logFC<0,"down","up")) %>%
dplyr::select(-c(lme.res.simple,lme.res.complex,lme.complex.tidy))
(dap.acute.volcano <- dap.res %>%
ggplot(aes(x=logFC, y=-log10(p.adj), color=FDR)) +
geom_point(alpha=0.7,size=.5, shape=16) +
theme_minimal() +
ggrepel::geom_text_repel(data = . %>% filter(FDR ==TRUE, abs(logFC) >1.5),
aes(label = Assay), color="black",
force = 0.5,
direction = "both",
segment.size = 0.2,
segment.alpha=.1,
show.legend = F,
size=1,
max.overlaps = 16,
box.padding = unit(0.2, "lines"),
point.padding = unit(0.5, "lines"),
segment.color = 'grey50') +
geom_vline(xintercept = c(-1, 1), linetype = "dotted", linewidth = .5) +
geom_hline(yintercept = -log10(0.01), linetype = "dotted", linewidth = .5) +
scale_x_continuous(breaks=c(-4.0,-3.0,-2.0,-1.0,0.0,1.0,2.0,3.0,4.0,5.0,6.0),limits = c(-4,6)) +
geom_segment(aes(x = 1.1, y = 21, xend = 4, yend = 21), color=time3_col[[1]], #y=16.5
arrow = arrow(length = unit(0.2, "cm"))) +
annotate("text",x=2.6, y=22.5, size=2, label="High abundant\nin acute malaria") + #y=17.6
geom_segment(aes(x = -1.1, y = 21, xend = -4, yend = 21),color=time3_col[[1]],
arrow = arrow(length = unit(0.2, "cm"))) +
annotate("text",x=-2.6, y=22.5, size=2, label="Low abundant\nin acute malaria") + #y=17.1
labs(x="Estimated difference (NPX) at acute compared to convalescence",
y="-log10(adj. p-value)") +
theme(legend.position = "none",
text = element_text(size=6)) +
scale_color_manual(values= c(time3_col[[3]],time3_col[[1]])))
hm.input <- data.wide %>%
inner_join(sampleTable_simple %>% dplyr::select(DAid,study_id, sample_id, Time),by="sample_id") %>%
dplyr::filter(Time=="Acute")
top25 <- dap.res %>%
dplyr::filter(FDR==TRUE) %>%
arrange(desc(abs(logFC)),desc(p.adj)) %>%
mutate(up_down = ifelse(logFC>0,"up","down")) %>%
dplyr::group_by(up_down) %>%
slice_head(n=25)
top25.split <- top25 %>% column_to_rownames("Assay") %>%
transmute(direction = factor(direction, levels = c("up","down"), labels = c("high","low")))
norm.df <- hm.input %>% column_to_rownames("sample_id") %>%
dplyr::select(-c(DAid,Time,study_id)) %>%
dplyr::select(c(top25$Assay)) %>%
t() %>% scale()
## == ComplexHeatmap == ##
(acute.npx.top25.hm <- norm.df %>%
scale() %>%
Heatmap(name="scaled\nNPX",
clustering_distance_columns = "spearman",
clustering_method_columns="ward.D2",
top_annotation = HeatmapAnnotation(df = data.frame(sample_id = colnames(.)) %>%
separate(sample_id, into = c("study_id","Time"),sep="\\|") %>%
left_join(subjectTable %>%
transmute(study_id,
endemic = factor(case_when(endemic=="primary_infected"~"primary",
endemic=="previously_exposed"~"previously",
.default=NA),levels=c("primary","previously")),
severe_5 = ifelse(severe_5==1,"yes","no"))) %>%
dplyr::select(-study_id,-Time),
simple_anno_size = unit(2, "mm"),
show_annotation_name = F,
annotation_legend_param = list(labels_gp = gpar(fontsize = 6),
title_gp = gpar(fontsize = 6),
legend_height = unit(3, "mm"),
grid_width = unit(3, "mm")),
show_legend = T,
annotation_label = list(severe_5 = "Severe malaria\n(WHO)",
endemic = "exposure"),
col = list(endemic = c("previously"= "#F1A340","primary"= "#998EC3"),#endemic2_col,
severe_5 = c("yes"="tomato","no"="grey80")),#severe_5_col),
which="column"),
show_column_names = F,
column_names_gp = gpar(fontsize = 8),
show_column_dend = TRUE,
cluster_columns = TRUE,
column_dend_reorder = TRUE,
row_dend_reorder=1-rowSums(abs(norm.df)),
cluster_row_slices = FALSE,
row_dend_width = unit(0.5, "cm"),
column_dend_height = unit(0.5, "cm"),
raster_resize_mat = mean,
row_title_gp = gpar(fontsize=6),
show_row_names = TRUE,
row_split = top25.split,
row_gap = unit(0.05,"cm"),
row_names_gp = gpar(fontsize = 6),
heatmap_legend_param = list(labels_gp = gpar(fontsize = 6),
title_gp = gpar(fontsize = 6),
legend_height = unit(3, "mm"),
grid_width = unit(3, "mm")),
height = ncol(.)*unit(1.4, "mm"),
width = ncol(.)*unit(.14,"mm")
))
top10up <- dap.res %>% dplyr::filter(FDR==TRUE) %>%
arrange(desc(abs(logFC)),desc(p.adj)) %>%
mutate(up_down = ifelse(logFC>0,"up","down")) %>%
filter(up_down=="up") %>%
head(n=10) %>%
pull(Assay)
(violin_malaria_top10 <- data.long %>%
inner_join(sampleTable_simple %>% dplyr::select(DAid,Time,sample_id,study_id),
by="sample_id") %>%
dplyr::filter(Assay %in% c(top10up)) %>%
mutate(Assay = factor(Assay, levels = top10up)) %>%
ggplot(aes(x=Time, y=NPX, color=Time,fill=Time)) +
geom_line(aes(group=study_id), color="grey",alpha=.6,size=.2)+
geom_violin(trim = F,alpha=.2,lwd=.25) +
geom_boxplot(alpha=1,width=0.25,color="black",outlier.size = 0.5, fatten = 1,lwd=.25,show.legend = F) +
facet_wrap(~Assay,ncol = 5,scales = "free_y") +
theme_minimal() +
labs(x="") +
theme(axis.text.x = element_text(size=6),
legend.position = "none") +
scale_color_manual(values=time3_col) +
scale_fill_manual(values=time3_col))
top10down <- dap.res %>% dplyr::filter(FDR==TRUE) %>%
arrange(desc(abs(logFC)),desc(p.adj)) %>%
mutate(up_down = ifelse(logFC>0,"up","down")) %>%
filter(up_down=="down") %>%
head(n=10) %>%
pull(Assay)
(violin_malaria_top10_down <- data.long %>%
inner_join(sampleTable_simple %>% dplyr::select(DAid,Time,sample_id,study_id),
by="sample_id") %>%
dplyr::filter(Assay %in% c(top10down)) %>%
mutate(Assay = factor(Assay, levels = top10down)) %>%
ggplot(aes(x=Time, y=NPX, color=Time,fill=Time)) +
geom_line(aes(group=study_id), color="grey",alpha=.6,size=.2)+
geom_violin(trim = F,alpha=.2,lwd=.25) +
geom_boxplot(alpha=1,width=0.25,color="black",outlier.size = 0.5, fatten = 1,lwd=.25,show.legend = F) +
facet_wrap(~Assay,ncol = 5,scales = "free_y") +
theme_minimal() +
labs(x="") +
theme(axis.text.x = element_text(size=6),
legend.position = "none") +
scale_color_manual(values=time3_col) +
scale_fill_manual(values=time3_col))
##gtsummary
library(gtsummary)
subjectTable %>%
mutate(years_since_endemic = case_when(endemic=="primary"~NA,
.default = years_since_endemic),
SOFA_total = as.numeric(SOFA_total),
endemic = str_replace(endemic,"_"," "),
severe_5 = case_when(severe_5==1 ~ "severe",.default = "non-severe")) %>%
tbl_summary(include = c(sex, age, endemic, years_since_endemic, diff_acuteSample_spt_current.abs, inf_rbc_max, severe_5,SOFA_total),
statistic = list(all_continuous() ~ "{median} ({min}-{max})",
all_categorical() ~ "{n} / {N} ({p}%)"
),
# digits = all_continuous() ~ 2,
digits = c(age ~ 0,
years_since_endemic ~ 0,
diff_acuteSample_spt_current.abs ~ 0,
inf_rbc_max ~ 2),
label = c(endemic ~ "Previous malaria exposure",
age ~ "Age",
sex ~"Sex",
years_since_endemic ~ "Years since living in endemic area",
inf_rbc_max ~ "Parasitemia [%]",
SOFA_total ~ "SOFA scale",
diff_acuteSample_spt_current.abs ~ "Days since symptom onset",
severe_5 = "Severe malaria according to WHO criteria\n(ref WHO Guidelines for the treatment of Malaria , 3rd edition, 2015)"),
missing = "no"
) %>%
add_n() %>% # add column with total number of non-missing observations
modify_header(label = "**Variable**") %>% # update the column header
bold_labels()
Variable | N | N = 721 |
---|---|---|
Sex | 72 | |
    female | 17 / 72 (24%) | |
    male | 55 / 72 (76%) | |
Age | 72 | 39 (20-63) |
Previous malaria exposure | 72 | |
    previously exposed | 48 / 72 (67%) | |
    primary infected | 24 / 72 (33%) | |
Years since living in endemic area | 47 | 10 (0-33) |
Days since symptom onset | 69 | 4 (0-27) |
Parasitemia [%] | 72 | 0.80 (0.01-8.00) |
Severe malaria according to WHO criteria (ref WHO Guidelines for the treatment of Malaria , 3rd edition, 2015) | 72 | |
    non-severe | 62 / 72 (86%) | |
    severe | 10 / 72 (14%) | |
SOFA scale | 72 | |
    0 | 1 / 72 (1.4%) | |
    1 | 7 / 72 (9.7%) | |
    2 | 19 / 72 (26%) | |
    3 | 16 / 72 (22%) | |
    4 | 13 / 72 (18%) | |
    5 | 8 / 72 (11%) | |
    6 | 5 / 72 (6.9%) | |
    9 | 2 / 72 (2.8%) | |
    12 | 1 / 72 (1.4%) | |
1 n / N (%); Median (Minimum-Maximum) |
daps.out <- lme_res_padj %>%
transmute(UniProt,
Assay,
estimate,
contrast,
SE,
CI = 1.96*SE,
df,
t.ratio,
p.value,
p.adj,
preferred_model = case_when(Assay %in% assay_better_complex_model ~ "complex",
.default = "simple"))
#daps.out%>% write_tsv(paste0(result.dir,"Supplementary_TableS2_DifferentiallyAbundantProteins.tsv"))
daps.out #%>% head()
## # A tibble: 1,427 × 11
## UniProt Assay estimate contrast SE CI df t.ratio p.value p.adj
## <chr> <chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 P28908 TNFRSF8 2.95 Acute -… 0.194 0.379 66.3 15.2 2.34e-23 3.33e-20
## 2 P22301 IL10 6.62 Acute -… 0.455 0.892 66.3 14.5 2.65e-22 1.38e-19
## 3 Q07325 CXCL9 4.33 Acute -… 0.293 0.574 62.9 14.8 3.86e-22 1.38e-19
## 4 P04233 CD74 2.20 Acute -… 0.134 0.263 52.0 16.4 3.41e-22 1.38e-19
## 5 P20333 TNFRSF… 2.66 Acute -… 0.185 0.363 66.3 14.4 5.00e-22 1.43e-19
## 6 P19320 VCAM1 1.48 Acute -… 0.105 0.207 66.3 14.0 1.78e-21 4.22e-19
## 7 P14555 PLA2G2A 4.41 Acute -… 0.243 0.476 41.1 18.2 2.91e-21 5.94e-19
## 8 O95998 IL18BP 2.41 Acute -… 0.177 0.346 66.3 13.6 7.29e-21 1.30e-18
## 9 P09603 CSF1 1.66 Acute -… 0.123 0.242 66.3 13.4 1.37e-20 2.18e-18
## 10 P15291 B4GALT1 1.65 Acute -… 0.119 0.233 59.9 13.9 2.12e-20 2.52e-18
## # ℹ 1,417 more rows
## # ℹ 1 more variable: preferred_model <chr>
daps.out %>% filter(p.adj <=0.01, preferred_model=="simple", estimate<=-1)
## # A tibble: 16 × 11
## UniProt Assay estimate contrast SE CI df t.ratio p.value p.adj
## <chr> <chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Q4KMG0 CDON -1.32 Acute -… 0.0779 0.153 38.1 -17.0 2.36e-19 2.11e-17
## 2 Q9NQ79 CRTAC1 -1.07 Acute -… 0.104 0.204 45.1 -10.3 2.08e-13 3.70e-12
## 3 P21583 KITLG -1.77 Acute -… 0.187 0.366 54.7 -9.44 4.38e-13 7.11e-12
## 4 O14788 TNFSF… -2.34 Acute -… 0.240 0.470 47.9 -9.75 5.97e-13 9.57e-12
## 5 Q9UBX1 CTSF -1.27 Acute -… 0.137 0.269 48.7 -9.22 2.96e-12 3.87e-11
## 6 O60911 CTSV -1.35 Acute -… 0.163 0.319 66.3 -8.27 8.50e-12 9.86e-11
## 7 P23280 CA6 -1.90 Acute -… 0.223 0.438 47.7 -8.51 4.02e-11 4.09e-10
## 8 Q8TDL5 BPIFB1 -2.18 Acute -… 0.288 0.564 49.6 -7.59 7.57e-10 5.58e- 9
## 9 Q8WTU2 SSC4D -1.75 Acute -… 0.232 0.454 41.0 -7.55 2.84e- 9 1.78e- 8
## 10 O14793 MSTN -1.12 Acute -… 0.151 0.296 41.4 -7.40 4.31e- 9 2.56e- 8
## 11 Q15166 PON3 -1.07 Acute -… 0.162 0.318 46.1 -6.62 3.39e- 8 1.75e- 7
## 12 Q9UNE0 EDAR -1.46 Acute -… 0.226 0.444 51.2 -6.44 4.12e- 8 2.09e- 7
## 13 P06850 CRH -1.07 Acute -… 0.205 0.401 48.5 -5.25 3.36e- 6 1.19e- 5
## 14 P22466 GAL -1.08 Acute -… 0.230 0.450 47.5 -4.68 2.38e- 5 7.31e- 5
## 15 Q92583 CCL17 -1.28 Acute -… 0.323 0.633 66.3 -3.97 1.81e- 4 4.78e- 4
## 16 Q9HCM2 PLXNA4 -1.33 Acute -… 0.352 0.690 66.3 -3.79 3.27e- 4 8.20e- 4
## # ℹ 1 more variable: preferred_model <chr>
comp## Supplementary Figure 1 ### Figure S1A
tmp <- data.long %>%
distinct(sample_id)
hm_mat <- tmp %>%
separate(sample_id, into = c("study_id","Time"),sep = "\\|",remove = T) %>%
mutate(Time = factor(Time, levels=c("Acute","D10","M12")),
dummy = Time) %>%
group_by(study_id) %>%
mutate(n = n()) %>%
arrange(-n) %>%
ungroup() %>%
pivot_wider(names_from = Time, values_from = dummy) %>%
dplyr::select(-n) %>%
column_to_rownames("study_id") %>%
relocate(Acute,D10,M12) %>%
t()
(sample_overlap_hm <- hm_mat %>%
Heatmap(row_names_gp = gpar(fontsize=6),
show_column_names = F,
na_col = "white",
column_title_gp = gpar(fontsize=6),
row_names_side = "left",
column_names_gp = gpar(fontsize=6),
column_names_rot = 45,
show_heatmap_legend = F,
column_split = data.frame(study_id = colnames(hm_mat)) %>%
left_join(subjectTable %>%
transmute(study_id, endemic) %>%
bind_rows(
tibble(study_id = c("2014003","2012PT12"),
endemic = c("primary_infected","primary_infected")))) %>%
transmute(endemic= factor(endemic, levels=c("primary_infected","previously_exposed"), labels= c("primary infected","previously exposed"))),
row_title_gp = gpar(fontsize=6),
rect_gp = gpar(col = "white", lwd = .5),
width = ncol(.)*unit(1, "mm"),
height = nrow(.)*unit(2, "mm"),
#height = ncol(.)*unit(1.4, "mm"),
# width = ncol(.)*unit(.5,"mm"),
border_gp = gpar(col = "black", lty = .9),
col = time3_col,
top_annotation = HeatmapAnnotation(df = data.frame(study_id = colnames(.)) %>%
left_join(subjectTable %>%
transmute(study_id, endemic) %>%
bind_rows(
tibble(study_id = c("2014003","2012PT12"),
endemic = c("primary_infected","primary_infected"))
)) %>%
dplyr::select(-study_id),
simple_anno_size = unit(3, "mm"),
show_annotation_name = F,
show_legend = F,
col = list(endemic = endemic2_col),
which="column"))
)
(acute_exposure_volcano <- lme_res %>%
unnest(cols=posthoc.time_exposure) %>%
filter(contrast %in%c("Acute primary_infected - M12 primary_infected",
"Acute previously_exposed - M12 previously_exposed")) %>%
ungroup() %>%
group_by(contrast) %>%
mutate(p.adj = p.adjust(p.value, method="fdr"),
FDR = ifelse(p.adj <= 0.01, TRUE,FALSE)) %>%
arrange(p.adj) %>%
transmute(Assay,contrast, estimate,SE,df,t.ratio, p.value, p.adj, FDR,
color = case_when(FDR==T & contrast=="Acute primary_infected - M12 primary_infected" ~ "primary_infected",
FDR==T & contrast=="Acute previously_exposed - M12 previously_exposed" ~ "previously_exposed",
.default = NA
),
contrast = factor(contrast, levels=c("Acute primary_infected - M12 primary_infected",
"Acute previously_exposed - M12 previously_exposed")),
label_4_complex_better = case_when(Assay %in% assay_better_complex_model & contrast=="Acute primary_infected - M12 primary_infected" ~ Assay,
.default = NA)) %>%
ggplot(aes(y=fct_reorder(Assay, estimate), x=estimate, color=color)) +
scale_color_manual(values=endemic2_col, na.value = "grey",breaks = c("primary_infected","previously_exposed")) +
labs(color=NULL,
x="Estimated difference +- 95% CI at acute for\nprimary infected and previously exposed individuals\ncompared to M12",#"Estimated difference (NPX) at acute compared to healthy-state at M12",
y="ranked proteins") +
geom_errorbar(aes(xmin=estimate - 1.96*SE,
xmax=estimate + 1.96*SE),
linewidth=.2, # Thinner lines
width=.2,
alpha=.1) +
geom_point(alpha=0.7,size=.5, shape=16) +
geom_text_repel(aes(label=label_4_complex_better),
show.legend = F,
color="black",
vjust = .5,
hjust = 1,
nudge_x = .75,
direction = "y",
size=1,
#label.size = .1,
segment.size = 0.2,
segment.alpha=.1,
max.overlaps = 16) +
geom_vline(xintercept = 0, lty=2, alpha=.6) +
theme_minimal())
lme_res_expo <- lme_res %>%
unnest(cols="posthoc.time_exposure") %>%
filter(contrast=="Acute primary_infected - Acute previously_exposed") %>%
ungroup() %>%
mutate(p.adj = p.adjust(p.value, method="fdr"),
FDR = ifelse(p.adj <= 0.01, TRUE,FALSE)) %>%
filter(FDR==T) %>%
arrange(-estimate)
assays_significant_different_at_acute_exp <- lme_res_expo %>%
pull(Assay)
library(eulerr)
plot(euler(list("acute" = lme_res_padj %>% filter(FDR==T) %>% pull(Assay),
"exposure"= lme_res_expo %>% filter(FDR==T) %>% pull(Assay))),# assays_significant_different_at_acute_exp)),#lme_res_expo %>% filter(FDR==T) %>% pull(Assay))),
fills = c("#C51B7D",
"white"),
quantities = TRUE,
lty = 1,#1:3,
fontsize=1,
labels = list(fontsize=5),
shape = "ellipse",adjust_labels = T)
acut_exposure_intersect <- intersect(assays_significant_different_at_acute_exp,#lme_res_expo %>% filter(FDR==T) %>% pull(Assay),
lme_res_padj %>% filter(FDR==T) %>% pull(Assay))
df <- lme_res %>%
filter(Assay %in% assays_significant_different_at_acute_exp) %>%
mutate(Assay = factor(Assay, levels=assays_significant_different_at_acute_exp)) %>%
unnest(cols=posthoc.time_exposure) %>%
filter(contrast %in%c("Acute primary_infected - M12 primary_infected",
"Acute previously_exposed - M12 previously_exposed")) %>%
ungroup() %>%
mutate(color = case_when(contrast=="Acute primary_infected - M12 primary_infected" ~ "primary_infected",
contrast=="Acute previously_exposed - M12 previously_exposed" ~ "previously_exposed",
.default = NA
)) %>%
# rownames_to_column("rownumbers") %>%
#filter(Assay%in%c("CXCL10","IFNG","CXCL9","TNFSF13B")) %>%
dplyr::select(-(lme.res.simple:posthoc.time))
require(ggtext)
(acute_exposure_significant <- df %>%
mutate(x.label = paste("<span style = 'color: ",
ifelse(Assay %in% acut_exposure_intersect , "pink", "black"),
";'>",
Assay,
"</span>", sep = ""),
x.label = fct_reorder(x.label, as.character(Assay))) %>%
ggplot(aes(x=x.label, y=estimate, color=color)) +
geom_point(shape=16,size=.5) +
scale_color_manual(values = endemic2_col, breaks = c("primary_infected","previously_exposed")) +
geom_hline(yintercept = 0, lty=2, alpha=.3) +
# ggrepel::geom_text_repel(show.legend = F, color="black") +
geom_errorbar(aes(ymin=estimate - 1.96*SE,
ymax=estimate + 1.96*SE),
linewidth=.2, # Thinner lines
width=.2,
alpha=.5) +
labs(x=NULL,
color = NULL,
y="Estimated difference +- 95% CI at acute for\nprimary infected and previously exposed individuals\ncompared to M12\n") +
theme(axis.text.x = element_markdown(angle = 90, hjust = 1,vjust=0.5, size=6),
legend.position = "top")
)
related to main Figure 2
supplementary_covariates.res <- data.long %>%
inner_join(sampleTable_simple, by="sample_id") %>%
inner_join(subjectTable, by="study_id") %>%
filter(Time != "D10") %>%
mutate(Time = factor(Time, levels=c("Acute","M12"))) %>%
group_by(Assay) %>%
nest() %>%
mutate(lme.res = purrr::map(data, ~ lmerTest::lmer(NPX ~ Time + year_inclusion + sex + age + endemic + inf_rbc_max + (1|study_id), REML = F,
data = .)),
lme.tidy = purrr::map(lme.res, ~ broom.mixed::tidy(.)))
supplementary_covariates.res_ <- supplementary_covariates.res %>%
unnest(cols = lme.tidy) %>%
filter(effect =="fixed",
term!="(Intercept)") %>%
#filter(term != "Residuals") %>%
mutate(term = case_when(term=="sexmale"~"sex",
term=="endemicprimary_infected"~"endemic",
term=="TimeM12"~"Time",
.default = term)) %>%
group_by(term) %>%
mutate(p.adj = p.adjust(p.value, method="fdr")
) %>%
mutate(term.col = case_when(p.adj > 0.01 ~ NA,
p.adj <= 0.01 ~ term))
cov.colors <- c("Time" = time3_col[[1]],setNames(brewer.pal(7,"Dark2")[c(1:3,5:8)], c("sex","endemic","age","year_inclusion","inf_rbc_max")))
counts.fdr <- supplementary_covariates.res_ %>%
filter(p.adj <= 0.01) %>%
group_by(term) %>%
count(sort = T)
(data.aov.plot <- supplementary_covariates.res_ %>%
mutate(term = factor(term, levels=counts.fdr$term)) %>%
ggplot(aes(x=term, y= -log10(p.adj))) +
geom_jitter(aes(color=term.col), show.legend = F,size=.25,alpha=.7,shape=16) +
ggrepel::geom_text_repel(data= . %>% group_by(term) %>% slice_max(n=5,order_by = -log10(p.adj)),
aes(label=Assay),
show.legend = F,force = .5, nudge_y = .25,
segment.size=0.2,
segment.alpha=.1,
size=1,
max.overlaps = 15, color="gray45") +
geom_hline(yintercept=-log10(0.01),
linetype = 3) +
scale_color_manual(values = cov.colors) +
geom_text(data=counts.fdr,aes(x=term, y=-1.2, label=n, color=term), show.legend = F) +
scale_x_discrete(labels=c("age" = "Age",
"year_inclusion" = "Year\nof\nsampling",
"sex" = "Sex",
"endemic" = "Previous\nexposure",
"inf_rbc_max" = "Parasitemia",
"Time" = "Infection\n(Acute vs convalescence)")) +
theme_minimal() +
labs(x=NULL,
color=NULL) +
theme())
n2show <- 3
(anova.sex.plot <- supplementary_covariates.res_ %>%
filter(term=="sex") %>%
arrange(p.adj) %>%
head(n=n2show) %>%
unnest(cols = data) %>%
ggplot(aes(x=Time, y=NPX, fill=as.character(sex))) +
geom_boxplot( fatten = 1,lwd=.25,outlier.size = 0.5) +
facet_wrap(~Assay, scales = "free_y") +
theme_minimal() +
labs(x=NULL,
fill="Sex") +
scale_fill_manual(values = sex2_col))
(anova.endemic.plot <- supplementary_covariates.res_ %>%
filter(term=="endemic") %>%
arrange(p.adj) %>%
head(n=n2show) %>%
unnest(cols = data) %>%
ggplot(aes(x=Time, y=NPX, fill=as.character(endemic))) +
geom_boxplot(fatten = 1,lwd=.25,outlier.size = 0.5) +
facet_wrap(~Assay, scales = "free_y") +
theme_minimal() +
labs(x=NULL,
fill="Previous exposure") +
scale_fill_manual(values = endemic2_col))
(anova.year_inclusion.plot <- supplementary_covariates.res_ %>%
filter(term=="year_inclusion") %>% arrange(p.adj) %>%
head(n=n2show) %>%
unnest(cols = data) %>%
ggplot(aes(x=as.numeric(year_inclusion),y=NPX,color=Time)) +
geom_point(size=.5) +
geom_smooth(linewidth=0.4,
show.legend = F) +
scale_x_continuous(limits = c(2011,2021),breaks = c(2011,2016,2021)) +
facet_wrap(~Assay, scales = "free_y") +
theme_minimal() +
labs(x="Year of inclusion",
color="Sample time point") +
scale_color_manual(values = time3_col))
(anova.age.plot <- supplementary_covariates.res_ %>%
filter(term=="age") %>% arrange(p.adj) %>%
head(n=n2show) %>%
unnest(cols = data) %>%
ggplot(aes(x=as.numeric(age),y=NPX,color=Time)) +
geom_point(size=.5) +
geom_smooth(linewidth=0.4,
show.legend = F) +
facet_wrap(~Assay, scales = "free_y") +
theme_minimal() +
labs(title="",
x="Age (years)",
color="Sample time point") +
scale_color_manual(values = time3_col)
)
(anova.inf_rbc_max.plot <- supplementary_covariates.res_ %>%
filter(term=="inf_rbc_max") %>% arrange(p.adj) %>%
head(n=n2show) %>%
unnest(cols = data) %>%
filter(Time=="Acute") %>%
ggplot(aes(x=as.numeric(inf_rbc_max),y=NPX,color=as.numeric(inf_rbc_max))) +
geom_point(size=.5) +
geom_smooth(aes(color=..x..),
linewidth=0.4,
show.legend = F) +
facet_wrap(~Assay, scales = "free_y") +
theme_minimal() +
labs(title="",
x="Parasitemia, infected erythrocytes [%]",
color="Parasitemia [%]") +
scale_color_gradient(low="grey",high="darkred")
#scale_color_manual(values = time3_col)
)
(anova.panel <- (anova.inf_rbc_max.plot /
anova.endemic.plot /
anova.year_inclusion.plot /
anova.sex.plot/
anova.age.plot))
lme_res.d10 <- data_nested %>%
mutate(lme.res = purrr::map(data, ~ lmerTest::lmer(NPX ~ Time + exposure + (1|study_id), REML = F,
control = lme4::lmerControl(check.conv.singular = "ignore"),
data = .x %>% dplyr::filter(Time!="Acute"))),
posthoc.time = purrr::map(lme.res, ~ summary(contrast(emmeans(., ~ Time), method = "pairwise")) %>% tibble())
)
lme_res.d10_padj <- lme_res.d10 %>%
unnest(cols="posthoc.time") %>%
filter(contrast=="D10 - M12") %>%
ungroup() %>%
mutate(p.adj = p.adjust(p.value, method="fdr"),
FDR = ifelse(p.adj <= 0.01, TRUE,FALSE)) %>%
arrange(p.adj)
acut_d10_list <- list("Acute"=c(lme_res_padj %>% filter(FDR==TRUE, estimate>1) %>% pull(Assay)),
"D10" = c(lme_res.d10_padj %>% filter(FDR==TRUE, estimate>1) %>% pull(Assay)))
## venn plot with overlapp numbers
(venn.DAP.acute.d10 <- ggvenn::ggvenn(acut_d10_list,
show_percentage = F,
fill_color = as.character(time3_col[c(1,2)]),
stroke_size = 0.5,
set_name_size = 2,
text_size = 2,
auto_scale = F,
show_elements = F) +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5),
text = element_text(size=6),
strip.text = element_text(size=6),
plot.tag = element_text(size=6)))
(d10_malaria_volcano <- lme_res.d10_padj %>%
arrange(p.adj, abs(estimate)) %>%
ggplot(aes(x=estimate, y=-log10(p.adj), color=FDR)) +
geom_point(alpha=0.7,size=.5, shape=16) +
ggrepel::geom_text_repel(data = . %>% filter(FDR ==TRUE, abs(estimate) >1),
aes(label = Assay), color="black",
force = 0.5,
direction = "both",
segment.size = 0.2,
segment.alpha=.1,
show.legend = F,
size=1.5,
max.overlaps = 16,
box.padding = unit(0.2, "lines"),
point.padding = unit(0.5, "lines"),
segment.color = 'grey50'
) +
theme_minimal() +
geom_vline(xintercept = c(-1, 1), linetype = "dotted", size = .5) +
geom_hline(yintercept = -log10(0.01), linetype = "dotted", size = .5) +
scale_x_continuous(breaks=c(-2.5,-1.0,0.0,1.0,2.5,5.0),limits = c(-2.5,5)) +
labs(x="Estimated difference (NPX)",
y="-log10(adj. p-value)",
subtitle= "D10 after disease vs. convalescence",
caption=paste0("DAP: ",lme_res.d10_padj %>% filter(FDR==TRUE) %>% nrow(),"\n",
"DAP up: ",lme_res.d10_padj %>% filter(FDR==TRUE,estimate>0) %>% nrow(),"\n",
"DAP FC>1: ",lme_res.d10_padj %>% filter(FDR==TRUE,estimate>1) %>% nrow(),"\n")) +
theme(legend.position = "none",
text = element_text(size=6)) +
scale_color_manual(values= c(time3_col[[3]],time3_col[[2]])))
(acute_d10_comp <- dap.res %>%
ungroup() %>%
arrange(-logFC) %>%
mutate(row_id=row_number()) %>%
mutate(Assay_orders = factor(as.factor(row_id), levels = row_id, labels = Assay),
Assay_orders = row_id) %>%
left_join(lme_res.d10_padj, by=c("Assay","UniProt"),suffix = c(".acute",".d10")) %>%
mutate(d = ifelse(estimate>logFC,T,F),
d_dbl = abs(logFC-estimate)) %>%
ggplot(aes(x=Assay_orders)) +
geom_segment(data = . %>% filter(p.adj.d10<=0.01, estimate >1),
aes(group=Assay, x = Assay_orders, xend = Assay_orders,yend = logFC, y=estimate, color=d), lwd=0.1) +
geom_point(aes(y=logFC), size=.05, alpha=1, color=time3_col[[1]]) +
geom_point(data = . %>% filter(p.adj.d10 <=0.05), aes(y=estimate),color=time3_col[[2]], size=.05,alpha=1) +
ggrepel::geom_text_repel(data = . %>% filter(p.adj.d10<=0.05, estimate >1) %>%
filter(d==TRUE) %>%
slice_max(order_by = d_dbl,n=10),
aes(label = Assay,y=estimate, color=d),
force = 0.5,
direction = "both",
segment.size = 0.1,
min.segment.length = 1,
nudge_x = 1,
show.legend = F,
size=2,
max.overlaps = 16,
box.padding = unit(0.1, "lines"),
point.padding = unit(0.5, "lines"),
segment.color = 'black') +
scale_color_manual(values=c("FALSE" = "navy","TRUE"="red"), labels=c("lower at D10","higher at D10")) +
geom_hline(yintercept = 0,linetype=3, color=time3_col[[3]]) +
scale_x_continuous(expand=c(.1,0),
trans = "sqrt") +
labs(color = NULL,
x="Proteins ranked by estimated difference (NPX)\nat acute malaria",
y="Estimated difference (NPX)") +
theme_minimal() +
theme(text = element_text(size=6))
)
related to main Figure 1
require(clusterProfiler)
length(unique(data$UniProt)) ## 1463
## [1] 1463
entrez_uniprot_name_mapping <- clusterProfiler::bitr(unique(data.long$UniProt),
fromType="UNIPROT",
toType=c("SYMBOL","ENTREZID"),
OrgDb="org.Hs.eg.db") %>%
dplyr::rename(UniProt = UNIPROT,
Symbol = SYMBOL,
Entrez = ENTREZID)
ranks_entrez <- entrez_uniprot_name_mapping %>%
inner_join(dap.res %>% ungroup() %>% filter(p.adj<=0.01), by="UniProt") %>%
arrange(-logFC) %>%
dplyr::select(Entrez, logFC) %>% deframe()
### KEGG
## all explore proteins
universe.proteins <- data.long %>% distinct(Assay,UniProt) %>% inner_join(entrez_uniprot_name_mapping,by="UniProt")
## prep enrich input
sig_proteins_df <- entrez_uniprot_name_mapping %>%
inner_join(dap.res %>% ungroup(), by="UniProt") %>% filter(p.adj <= 0.01)
# From significant results, we want to filter on log2fold change
sig_proteins <- sig_proteins_df$logFC
# Name the vector
names(sig_proteins) <- sig_proteins_df$Entrez
# omit NA values
sig_proteins <- na.omit(sig_proteins)
# filter on min log2fold change (log2FoldChange > 1)
sig_proteins <- names(sig_proteins)[abs(sig_proteins) > 1]
cp_KEGG.res <- enrichKEGG(
sig_proteins,
organism = "hsa",
#keyType = "UNIPROT",
pvalueCutoff = 1,
pAdjustMethod = "BH",
universe = universe.proteins$Entrez,
minGSSize = 10,
maxGSSize = 500,
qvalueCutoff = 1,
use_internal_data = F
)
#data.frame(cp_KEGG.res)
(cp.kegg.acutefc1 <- data.frame(cp_KEGG.res) %>%
separate(GeneRatio, into=c("hit","total"),sep="/",remove = F,convert=TRUE) %>%
head(10) %>%
mutate(ratio = hit/total) %>%
ggplot(aes(x=fct_reorder(Description, -ratio,.desc = TRUE), y=ratio)) +
geom_bar(stat = "identity", width = 0.05) +
geom_point(aes(color=-log10(p.adjust))) +#size = 3) +
geom_text(aes(label=hit),size=2, nudge_y = .01)+
scale_y_continuous(expand = c(0.02,0), trans = "pseudo_log") +
scale_x_discrete(expand = c(-0.01, 1)) +
theme_minimal() +
theme(text = element_text(size=6 ),
axis.text.y = element_text(size = 6),
axis.ticks.x = element_blank()) +
coord_flip() +
guides(size = guide_legend(reverse=TRUE)) +
labs(title = "KEGG_2021_Human",
x= NULL,
y = "ratio [protein/total]",
size="Protein\noverlapp",
color=expression("-Log"[10]*"(p.adj)"))
)
require(pathview)
sig_proteins_df <- entrez_uniprot_name_mapping %>%
inner_join(dap.res %>% ungroup(), by="UniProt") %>% filter(p.adj <= 0.01, abs(logFC)>1)
logFC <- sig_proteins_df$logFC
names(logFC) <- sig_proteins_df$Entrez
pv.out <- pathview(gene.data = logFC,
pathway.id = "hsa04060",
species = "hsa",
limit = list(gene=5, cpd=1),
)
knitr::include_graphics("hsa04060.pathview.png")
KEGG
Potential sources and functionalities of plasma proteins during acute malaria
secretome_location_dap <- dap.res %>%
dplyr::filter(FDR==TRUE) %>%
arrange(desc(abs(logFC)),desc(p.adj)) %>%
inner_join(hpa_24.0, by=c("Assay"="gene","UniProt"="uniprot")) %>%
mutate(secretome_location_tissue_spec = case_when(secretome_location=="Not secreted"~ paste0(secretome_location," - ",rna_tissue_specificity),
.default = secretome_location)) %>%
group_by(secretome_location_tissue_spec) %>%
count(sort = TRUE)
## change order
secretome_location_dap.order <- secretome_location_dap %>% pull(secretome_location_tissue_spec)
secretome_location_dap.order <- c("Secreted to blood","Intracellular and membrane","Secreted in other tissues","Secreted to extracellular matrix",
"Secreted to digestive system", "Secreted in brain", "Secreted - unknown location", "Secreted in female reproductive system",
"Secreted in male reproductive system",
"Not secreted - Tissue enriched", "Not secreted - Tissue enhanced","Not secreted - Group enriched", "Not secreted - Low tissue specificity")
## plot everything
(hpa.protein.origin.overview <- secretome_location_dap %>%
ungroup() %>%
mutate(secretome_location_tissue_spec = factor(as.factor(secretome_location_tissue_spec), levels=rev(secretome_location_dap.order))) %>%
ggplot(aes(x=secretome_location_tissue_spec,y=n,fill=secretome_location_tissue_spec)) +
geom_col(width = 0.5) +
geom_text(aes(label=n),size=2, nudge_y = -.2) +
coord_flip() +
scale_y_continuous(trans="pseudo_log",name = NULL, sec.axis = sec_axis(~.,labels = NULL,breaks = NULL, name = "Number of DAPs"),
#expand=c(0,.15)
expand=c(0,0)
) +
theme_bw() +
theme(axis.text.y = element_text(size = 6),
axis.text.x = element_text(size = 6),
legend.text=element_text(size=6),
legend.title=element_text(size=6),
plot.title = element_text(size=6))+
scale_fill_manual(values=secretome_location_tissue_spec_cols,
limits = secretome_location_dap.order) +
labs(fill="Protein\norigin\nby HPA",
x=NULL))
temp.df <- dap.res %>%
dplyr::filter(FDR==TRUE) %>%
arrange(desc(abs(logFC)),desc(p.adj)) %>%
inner_join(hpa_24.0, by=c("Assay"="gene","UniProt"="uniprot")) %>%
mutate(secretome_location_tissue_spec = case_when(secretome_location=="Not secreted"~ paste0(secretome_location," - ",rna_tissue_specificity),
.default = secretome_location))
df1 <- temp.df %>%
transmute(Assay, logFC, p.adj, direction,secretome_location_tissue_spec, secretome_function)
df2 <- df1 %>%
group_by(secretome_location_tissue_spec) %>%
summarise(atlas_name_count = n()) %>%
left_join(
df1 %>%
group_by(secretome_location_tissue_spec, secretome_function, direction) %>%
summarise(function_name_count = n()),
by="secretome_location_tissue_spec") %>%
left_join(
df1 %>% group_by(secretome_location_tissue_spec, secretome_function, direction) %>%
summarise(median_logFC = median(logFC)),
by=c("secretome_location_tissue_spec", "secretome_function","direction"))
(hpa.function.bubbleplot <- df2 %>%
filter(!secretome_location_tissue_spec %in% c("NULL", "NA","no mapping"),
!secretome_function %in% c("NULL")) %>%
mutate(secretome_function = case_when(is.na(secretome_function) ~ "No secretome function",
.default = secretome_function)) %>%
mutate(secretome_location_tissue_spec = factor(as.factor(secretome_location_tissue_spec),
levels=rev(secretome_location_dap.order))) %>%
ggplot(aes(x=median_logFC,
y= fct_reorder2(secretome_function,
atlas_name_count,
function_name_count,.desc = F))) +
geom_point(aes(size=function_name_count, color=secretome_location_tissue_spec), show.legend = T) +
geom_vline(xintercept = 0,linetype=1) +
geom_text(aes(label = function_name_count),
size=2, color="grey20",show.legend = F, parse = F) +
labs(x="median estimated difference (NPX)",
y=NULL,
title = "Number DAPs per HPA function",
size="Number of proteins",
caption="Size: number of proteins",
color = "HPA source") +
scale_color_manual(values=secretome_location_tissue_spec_cols, limits = secretome_location_dap.order) +
scale_x_continuous(trans = "pseudo_log") +
scale_y_discrete(expand = c(0,1))+
guides(size = "none") +
theme_minimal() +
scale_size(range=c(3,6)) +
theme(text = element_text(size=6))
)
(acute_malaria_hpa_source <- temp.df %>%
right_join(top25 %>% transmute(Assay,logFC)) %>%
ggplot(aes(x=fct_reorder(Assay,logFC), y=logFC, color=secretome_location_tissue_spec)) +
geom_point(show.legend = TRUE,size=1) +
geom_col(width = .05,show.legend = F) +
scale_y_continuous(sec.axis = sec_axis(~.,labels = NULL,breaks = NULL, name = "Top25 DAP")) +
coord_flip() +
theme_minimal() +
theme(plot.title.position = "plot",
axis.text.y = element_text(size = 4),
axis.text.x = element_text(size = 6),
axis.title.x = element_text(size = 6),
panel.grid.major = element_blank()) +
labs(color="HPA source",
x="",
y="Estimated difference (NPX)",
title = "Protein source according to Human Protein Atlas") +
scale_color_manual(values=secretome_location_tissue_spec_cols,
limits=secretome_location_dap.order))
related to main Figure 2
malaria.daps.hpa23 <- dap.res %>%
arrange(desc(abs(logFC)),desc(p.adj)) %>%
filter(p.adj<=0.01) %>%
arrange(-logFC) %>%
left_join(hpa_24.0,by=c("UniProt" = "uniprot")) %>%
ungroup()
## abundant proteins in acute malaria plasma, not immune cell specific nor predicted to be secreted
## => tissue leakage??
malaria.tissue.leakage <- malaria.daps.hpa23 %>%
filter(is.na(rna_blood_cell_specificity) |
rna_blood_cell_specificity=="Not detected in immune cells",
rna_tissue_specificity %in% c("Tissue enriched"),#,"Group enriched","Tissue enhanced"),
secretome_location =="Not secreted",
logFC >0)#.5)
secretome.location.order <- c("Secreted to blood","Intracellular and membrane","Secreted in other tissues","Secreted to extracellular matrix",
"Secreted to digestive system", "Secreted in brain", "Secreted - unknown location", "Secreted in female reproductive system",
"Secreted in male reproductive system","Not secreted")
secretome.fun.count <- malaria.daps.hpa23 %>% group_by(secretome_function) %>% count() %>% arrange(-n) %>% pull(secretome_function)
df <- malaria.daps.hpa23 %>%
filter(logFC>=0) %>%
transmute(Assay,
direction,
secretome_location = factor(secretome_location, levels= secretome.location.order),
secretome_function = factor(secretome_function, levels = secretome.fun.count),
rna_blood_cell_specificity,
rna_tissue_specificity = factor(rna_tissue_specificity, levels = c("Tissue enriched",
"Group enriched",
"Tissue enhanced",
"Low tissue specificity",
"Not detected")),
tissue_enriched = factor(case_when(rna_blood_cell_specificity=="Not detected in immune cells" & rna_tissue_specificity == "Tissue enriched" & secretome_location =="Not secreted" & direction == "up" ~"1",
.default = "0"),
levels=c("1","0"),
labels=c("1"="Tissue specific and not secreted",
"0"="Less tissue specific")
))
(dap.origin.w.tl <- df %>%
ggplot(aes(axis1 = secretome_location,
axis2 = secretome_function,
axis3 = rna_tissue_specificity,
axis4 = tissue_enriched
)) +
geom_alluvium(aes(fill = secretome_location),width = 1/12,geom = "flow", lode.guidance = "forward",) +
geom_stratum(aes(fill=secretome_location),width = 1/12) +
ggfittext::geom_fit_text(stat = "stratum", aes(label = after_stat(stratum)),min.size = 1, show.legend = F) +
scale_x_discrete(limits = c("Secretome\nlocation","Secretome\nfunction", "Tissue specificity\n(based on gene expression)","Tissue specificity\n(overall)"), expand = c(.2, .05)) +
theme_bw() +
scale_fill_manual(values= c(secretome_location_cols,"NA"="red","SPEC"="white")) +
labs(title = "Abundant proteins in blood during acute malaria",
y= "Number of proteins") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.ticks.x = element_blank(),
panel.background = element_rect(colour = "black", size=0.5, fill=NA),
panel.border = element_rect(size = 0.2, colour = "grey"),
legend.position = "none"))
(alluvial_proteinorigin <- df %>%
ggplot(aes(axis1 = secretome_location,
axis3 = rna_tissue_specificity,
axis4 = tissue_enriched
)) +
geom_alluvium(aes(fill = tissue_enriched),width = 1/12,geom = "flow", lode.guidance = "forward",) +
geom_stratum(aes(fill=secretome_location),width = 1/12) +
ggfittext::geom_fit_text(stat = "stratum", aes(label = after_stat(stratum)),min.size = 1, show.legend = F) +
scale_x_discrete(limits = c("Secretome\nlocation",#"Secretome\nfunction",
"Tissue specificity\n(based on gene expression)","Tissue specificity\n(overall)"), expand = c(.2, .05)) +
theme_bw() +
scale_fill_manual(values= c("Tissue specific and not secreted"="red","Less tissue specific"="grey90")) +
labs(title = "Potential tissue leakage proteins in blood during acute malaria",
y= "Number of proteins") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.ticks.x = element_blank(),
panel.background = element_rect(colour = "black", size=0.5, fill=NA),
panel.border = element_rect(size = 0.2, colour = "grey"),
legend.position = "none"))
malaria.tissue.leakage <- df %>% filter(tissue_enriched=="Tissue specific and not secreted") %>% pull(Assay)
### tissue expression¨
mat <- hpa.tissue %>%
filter(gene_name %in% c(malaria.tissue.leakage)) %>%
pivot_wider(names_from = tissue, values_from = n_tpm, values_fn = median) %>%
dplyr::select(-gene) %>%
column_to_rownames("gene_name")
mat1 <- mat %>%
t() %>%
scale() %>%
scales::rescale(to=c(0,1)) %>%
t()
(hm.tissue.leakage <- mat1 %>%
t() %>%
Heatmap(row_names_gp = gpar(fontsize=6),
column_names_gp = gpar(fontsize=4),
cluster_rows = T,
cluster_columns = T,
name="scaled\nnTPM",
column_title = "High abundant plasma proteins\n 'Tissue specific and not secreted'",
column_title_gp = gpar(fontsize=6),
col = circlize::colorRamp2(c(min(mat1),max(mat1)), c("white","red")),
column_dend_height = unit(5,"mm"),
row_dend_width = unit(5,"mm"),
heatmap_legend_param = list(labels_gp = gpar(fontsize = 6),
title_gp = gpar(fontsize = 6),
legend_height = unit(20, "mm")))
)
(tissue.leakage.violine <- data.long %>%
inner_join(sampleTable_simple %>% dplyr::select(DAid,Time,sample_id,study_id),
by="sample_id") %>%
dplyr::filter(Assay %in% c(malaria.tissue.leakage),
Time!="D10") %>%
mutate(Assay = factor(Assay, levels = c(malaria.tissue.leakage))) %>%
ggplot(aes(x=Time, y=NPX, color=Time,fill=Time)) +
geom_line(aes(group=study_id), color="grey",alpha=.6,size=.2)+
geom_violin(trim = F,alpha=.2,lwd=.25) +
geom_boxplot(alpha=1,width=0.25,color="black",outlier.size = 0.5, fatten = 1,lwd=.25,show.legend = F) +
facet_wrap(~Assay,ncol = 9,scales = "free_y") +
theme_minimal() +
labs(x="") +
theme(axis.text.x = element_blank(),
legend.position = "bottom") +
scale_color_manual(values=time3_col) +
scale_fill_manual(values=time3_col))
df <- data.long %>%
inner_join(sampleTable_simple %>% dplyr::select(DAid,Time,sample_id,study_id),
by="sample_id") %>%
filter(Assay%in% c("AGXT","HAO1"),
Time=="Acute") %>%
left_join(
clinchem_study_pats_acute.wide %>% transmute(study_id, p_asat, p_alat)
) %>%
pivot_longer(names_to = "clinchem", values_to = "clinchem_val",cols = p_asat:p_alat)
df %>%
ggplot(aes(x=NPX,y=clinchem_val)) +
#geom_point(shape=16, size=.5) +
geom_point() +
geom_smooth(method="lm") +
facet_grid(Assay~clinchem)
library(see)
(agxt_hao1_alat_asat_cor <- df %>%
transmute(sample_id, Assay,NPX,clinchem,clinchem_val) %>%
pivot_wider(names_from = Assay, values_from = NPX) %>%
pivot_wider(names_from = clinchem, values_from = clinchem_val) %>%
column_to_rownames("sample_id") %>%
correlation(method = "spearman") %>%
summary() %>%
plot() +
theme(text = element_text(size=12)))
related to main Figure 2 ### Figure S5A-D
i="Secreted to blood"
acute_malaria_hpa_function_facet.list <- list()
for(i in c("Secreted to blood","Intracellular and membrane","Secreted in other tissues","Secreted to extracellular matrix","Secreted to digestive system","Secreted in brain","Secreted - unknown location")){
(acute_malaria_hpa_function_facet.list[[i]] <-
dap.res %>%
dplyr::filter(FDR==TRUE,
abs(logFC)>0) %>%
inner_join(hpa_24.0,by=c("Assay"="gene")) %>%
dplyr::filter(secretome_location == i,
!secretome_function %in% c(NA,"NULL","Not secreted")
) %>%
ungroup() %>%
mutate(secretome_function = factor(secretome_function))%>%
ggplot(aes(x = fct_reorder2(Assay, secretome_function, -logFC),
y=logFC, color = secretome_location)) +
geom_point(size=1, show.legend = F) +
geom_errorbar(aes(ymin= logFC - 1.96*SE,# 1.96*SE =conf.low
ymax=logFC + 1.96*SE,#conf.high,
color=secretome_location),
size=.25,
width=.2,
position=position_dodge(.9),
alpha=.5) +
geom_hline(yintercept = 0, linetype=2, alpha=.4) +
scale_color_manual(values = secretome_location_cols) +
coord_flip() +
theme_minimal() +
facet_grid(cols = vars(secretome_location),
rows = vars(secretome_function), scales = "free", space = "free_y",drop = F) +
theme(strip.text.y = element_text(angle = 0,size=3.5),
strip.placement = "inside",
axis.text = element_text(size = 3),
axis.title = element_text(size=5),
legend.title = element_text(size=5),
legend.text = element_text(size=5),
plot.caption = element_text(size=5),
panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank(),
panel.grid.major.x = element_line(linewidth = .5),
panel.grid.minor.x = element_line(linewidth = .5),
plot.title.position = "plot",
legend.position = "none") +
labs(x="",
y="Estimated difference (NPX) with 95% CI") +
expand_limits(y = c(-1,1))
)
}
df_4_fc <- data.wide %>%
inner_join(sampleTable_simple %>% dplyr::select(DAid,study_id, sample_id, Time),by="sample_id") %>%
dplyr::filter(Time!="D10") %>%
dplyr::select(DAid,study_id, sample_id, Time, everything()) %>%
pivot_longer(cols = 5:ncol(.), names_to = "Assay", values_to = "NPX") %>%
dplyr::select(-DAid,-sample_id) %>%
pivot_wider(values_from = "NPX", names_from = "Time")
M12_median_M12 <- df_4_fc %>%
group_by(Assay) %>%
summarise(m12_median = median(M12,na.rm = TRUE))
fc_over_median_M12 <- df_4_fc %>%
inner_join(M12_median_M12, by="Assay") %>%
group_by(Assay) %>%
mutate(log2FC_medianM12 = Acute-m12_median) %>%
dplyr::select(-M12) %>%
na.omit() %>%
dplyr::rename(dNPX = log2FC_medianM12)
#fc_over_median_M12 %>% saveRDS("../data/data_clean/20230426_Explore1536_fc_over_median_m12_tidy_long.rds")
fc_over_median_M12 %>% head()
## # A tibble: 6 × 5
## # Groups: Assay [6]
## study_id Assay Acute m12_median dNPX
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 2011PT01 NPPB 1.48 0.0487 1.43
## 2 2011PT01 HNRNPK -0.193 0.658 -0.851
## 3 2011PT01 CEBPB 0.702 0.140 0.562
## 4 2011PT01 CRHR1 -0.829 0.0427 -0.872
## 5 2011PT01 TSLP 0.952 0.154 0.798
## 6 2011PT01 MFAP3 0.871 0.0906 0.781
Single-cell transcriptomics of PBMCs during acute malaria
df <- data.wide %>%
inner_join(sampleTable_simple %>%
transmute(sample_id),
by="sample_id") %>%
column_to_rownames("sample_id")
## PC calculation
pcaRes <- stats::prcomp(df,center = TRUE, scale. = TRUE)
varExp <- round(pcaRes$sdev^2 / sum(pcaRes$sdev^2) * 100)
pcaDF <- data.frame(PC1 = pcaRes$x[, 1],
PC2 = pcaRes$x[, 2]) %>%
rownames_to_column("sample_id")
## Prep for plotting
data4plot <- pcaDF %>%
dplyr::inner_join(sampleTable_simple, by="sample_id") %>%
mutate(rhapsody_lib = ifelse(study_id == "2013004","Library 1",
ifelse(study_id == "2013007","Library 2",
ifelse(study_id == "2013008","Library 3",
ifelse(study_id == "2018002","Library 4",NA)))))
(plot.pca.rhapsody <- data4plot %>%
ggplot(mapping = aes(x = PC1, y = PC2, color = Time,fill=NULL, label = NULL)) +
geom_point(alpha = 0.9, size = 1) +
ggrepel::geom_text_repel(data= . %>% filter(study_id %in% rhapsody_study_ids),
aes(x=PC1,y=PC2, label=rhapsody_lib),color="grey10",
direction = "both",box.padding = 1, max.overlaps = Inf,
size=3, alpha=.9,show.legend = F) +
geom_point(data= . %>% filter(study_id %in% rhapsody_study_ids),
aes(color=Time),
size=0.5, alpha=.8) +
guides(colour = guide_legend(override.aes = list(size=1,alpha=1)))+
ggplot2::scale_color_manual(values= time3_col) +
labs(x = paste0("PC1 (", varExp[1], " %)"),
y = paste0("PC2 (", varExp[2], " %)"),
shape="Rhapsody library") +
theme_minimal() +
theme(legend.title = element_text(size = 6),
legend.text = element_text(size = 6)))
load seurat object & set colors
library(Seurat)
#pbmc <- readRDS("../data/data/rhapsody/2021-12-17AbSeq_Cell_Calling_qc_cca_wnn_clustering_annotated.rds")
pbmc <- readRDS("../data/data/rhapsody/MalariaTraveler_RhapsodyAbSeq_Cell_Calling_qc_cca_wnn_clustering_annotated.rds")
pbmc$Group_rev <- factor(as.factor(pbmc$Group), levels = c("primary", "previously"))
#- RNA Normalization
pbmc <- NormalizeData(object = pbmc, assay = 'RNA', normalization.method = 'LogNormalize', scale.factor = 10000)
#- Ab Normalization
pbmc <- NormalizeData(object = pbmc, assay = 'ADT', normalization.method = 'CLR') #margin If performing CLR normalization, normalize across features (1) or cells (2)
## list of proteins/mrna targets covered
ab.markers <- rownames(pbmc@assays$ADT)
rna.markers <- rownames(pbmc@assays$RNA)
##change group color
ENDEMIC_colors <- setNames(c("#F1A340","#998EC3"), c("previously_exposed","primary_infected"))
#previously_exposed primary_infected
# "#F1A340" "#998EC3"
ENDEMIC_colors <- setNames(brewer.pal(3,"PuOr")[c(1,3)], c("previously_exposed","primary_infected"))
names(ENDEMIC_colors) <- c("previously","primary")
TIME_colors <- setNames(brewer.pal(6,"PiYG"), c("Acute","D10","M1","M3","M6","Y1"))
scaled_01_col <- circlize::colorRamp2(c(0,1), c("white","red"))
L1_colors <- length(unique(pbmc@meta.data$CellType_L1))
L1_colors <- c("#68a748",
"#8761cc",
"#ae953e",
"#688bcc",
"#cc693d",
"#4aac8d",
"#c361aa",
"#ca5369")
names(L1_colors) <- unique(pbmc@meta.data$CellType_L1)
Idents(pbmc) <- "CellType_L2"
L2_colors <- length(unique(pbmc@meta.data$CellType_L2))
L2_colors <- c("mDC"="#79658C",
"pDC" = "#AEA14E",
"CD14 monocytes"= "#D1EAB7",
"CD16 monocytes"="#DB7D47",
"Vd2+ gdT"="#796CD7",
"Vd2- gdT" = "#66AC55",
"NK CD56dim CD16+" = "#EBB69E",
"NK CD56dim" = "#CEE486",
"NK CD56bright"= "#E0DADB",
"NK prolif." ="#B5E7DF",
"B naive" = "#889AE5",
"B memory" = "#66ED58",
"Plasma cells" = "#893CEA",
"CD4 naive"= "#E15081",
"CD4 Treg CD80+"= "#579189",
"CD4 Treg CD80-"= "#66DEE2",
"CD4 Tfh"= "#D64EDB",
"CD4 effect. activated" = "#D38D96",
"CD4 effect. memory" = "#EDD591",
"CD4 trans. memory" = "#DAB8E3",
"CD4 central memory" = "#6FE8BE",
"CD8 naive"= "#CAEB48",
"CD8 trans. memory"= "#85EB8F",
"CD8 Tfh"="#E6D253",
"NKT"="#7BBCDF",
"CD8 effect. memory" = "#A7AE90",
"undefined"= "#D984D1")
names(L2_colors) <- unique(pbmc@meta.data$CellType_L2)
arr <- list(x = -13, y = -13, x_len = 5, y_len = 5)
umap_axis <- annotate("segment", linewidth=0.1,
x = arr$x, xend = arr$x + c(arr$x_len, 0),
y = arr$y, yend = arr$y + c(0, arr$y_len),
arrow = arrow(type = "closed", length = unit(3, 'pt')))
umap_axis_xlab <- annotate("text", x = arr$x+2.5, y = arr$x-1, label = "wnnUMAP 1",size=1)
umap_axis_ylab <- annotate("text", y = arr$y+2.5, x = arr$y-1, label = "wnnUMAP 2",size=1,angle=90)
rhapsody_umap_coords <- data.table::data.table(pbmc@meta.data, Embeddings(object = pbmc, reduction = 'wnn.umap')) %>% rownames_to_column("CellID")
lable_df <- rhapsody_umap_coords %>%
dplyr::group_by(CellType_L1) %>%
dplyr::select(CellType_L1, contains("UMAP")) %>%
summarise_all(mean)
(rhapsody_umap_ggplot_l1 <- rhapsody_umap_coords %>%
ggplot(aes(x = wnnUMAP_1, y = wnnUMAP_2)) +
geom_point(aes(color = as.character(CellType_L1)), size = 0.1, alpha=.5, show.legend = F,shape = 16) +
ggrepel::geom_text_repel(data=lable_df,aes(x=wnnUMAP_1,y=wnnUMAP_2, label=CellType_L1),size=1.5) +
coord_fixed()+
scale_color_manual(values=L1_colors) +
theme_void() +
umap_axis +
umap_axis_xlab +
umap_axis_ylab)
lable_df <- rhapsody_umap_coords %>%
dplyr::group_by(CellType_L2) %>%
dplyr::select(CellType_L2, contains("UMAP")) %>%
summarise_all(mean)
(rhapsody_umap_ggplot_l2 <- rhapsody_umap_coords %>%
ggplot(aes(x = wnnUMAP_1, y = wnnUMAP_2)) +
geom_point(aes(color = as.character(CellType_L2)), size = 0.1, alpha=.5, show.legend = F, shape = 16) +
ggrepel::geom_text_repel(data=lable_df,aes(x=wnnUMAP_1,y=wnnUMAP_2, label=CellType_L2),size=1.5) +
labs(x = 'wnnUMAP 1', y = 'wnnUMAP 2', color=NULL) +
coord_fixed()+
scale_color_manual(values=L2_colors) +
theme_void() +
umap_axis +
umap_axis_xlab +
umap_axis_ylab)
require(scales)
(per_sample_perc_l1 <- tibble(pbmc@meta.data) %>%
mutate(orig.ident = paste0("Patient"," 0",Library)) %>%
group_by(Time,orig.ident) %>%
count(CellType_L1) %>%
# Stacked + percent
ggplot(aes(fill = CellType_L1, y=n, x=orig.ident)) +
geom_bar(position="fill", stat="identity",width = 0.9) +
scale_fill_manual(values = L1_colors) +
facet_grid(~Time,scales = "free_x") +
scale_y_continuous(labels = scales::percent,expand = c(0,0)) +
labs(x = "",
y = "Frequency",
fill="") +
theme_minimal(base_size = 6) +
#theme_cowplot() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
panel.grid.major = element_blank()))
pbmc_acute <- subset(pbmc, subset=Time=="Acute")
#- RNA Normalization
pbmc_acute <- NormalizeData(object = pbmc_acute, assay = 'RNA', normalization.method = 'LogNormalize', scale.factor = 10000) %>% ScaleData()
#- Ab Normalization
pbmc_acute <- NormalizeData(object = pbmc_acute, assay = 'ADT', normalization.method = 'CLR') #margin If performing CLR normalization, normalize across features (1) or cells (2)
## Pseudobulk (Celltype)
#https://github.com/satijalab/seurat/discussions/4210
## AverageExpression
Idents(pbmc_acute) <- "CellType_L2"
## calculation of pseudobulk, for each identity based on count data
pbmc_acute.avg.wide <- log1p(Seurat::AverageExpression(pbmc_acute, group.by = "CellType_L2", slot = "counts", verbose = FALSE)$RNA) %>%
data.frame() %>%
rownames_to_column("gene")
colnames(pbmc_acute.avg.wide) <- c("gene",colnames(Seurat::AverageExpression(pbmc_acute, group.by = "CellType_L2", slot = "counts", verbose = FALSE)$RNA))
pbmc_acute.avg.long <- pbmc_acute.avg.wide %>% pivot_longer(names_to = "celltype", values_to = "avgExp",cols = -gene)
Mapping (gene - protein)
## mapping
full_mapping <- mapping_uniprot_ensembl %>% left_join(hpa_24.0, by=c("Ensembl"="ensembl"))
wilcoxUp <- dap.res %>% filter(FDR==TRUE,logFC>1) %>% pull(UniProt)
gene.selection <- full_mapping %>%
filter(UniProt %in% wilcoxUp) %>%
filter(Symbol %in% rna.markers) %>% distinct(Assay) %>% pull(Assay)
Heatmap genes expression
rhapsody.gene.match <- full_mapping %>%
filter(UniProt %in% wilcoxUp) %>%
filter(Symbol %in% unique(pbmc_acute.avg.wide$gene))
## make matrix for heatmap
mat_pbmc_acute <- pbmc_acute.avg.wide %>%
filter(gene %in% rhapsody.gene.match$Symbol) %>%
column_to_rownames("gene") %>%
as.matrix()
## make row(gene/protein) annotation df
rowAnno.df <- data.frame(Assay = rownames(mat_pbmc_acute)) %>%
left_join(dap.res,by=c("Assay")) %>%
#left_join(hpa_24.0,by=c("Assay"="gene")) %>%
left_join(hpa_24.0,by=c("UniProt"="uniprot")) %>%
mutate(secretome_function = ifelse(is.na(secretome_function),"No annotated function", secretome_function))
col.anno.df <- data.frame(colnames = colnames(mat_pbmc_acute)) %>%
transmute(colnames,
colanno = case_when(grepl("DC",colnames) ~ "DC",
grepl("monocytes",colnames) ~ "Monocytes",
grepl("CD4",colnames) ~ "CD4+ T",
grepl("CD8",colnames) ~ "CD8+ T",
grepl("B|Plasma",colnames) ~ "B",
grepl("NK",colnames) ~ "NK",
grepl("gdT",colnames) ~ "gdT",
.default = "undefined"),
colanno = factor(colanno, levels= c("DC","Monocytes","NK","gdT","B","CD4+ T","CD8+ T","undefined")))
right.anno <- HeatmapAnnotation(df = col.anno.df %>% column_to_rownames("colnames"),
which = "row",
col = list(colanno = L1_colors),
# name = "SNF cluster",
show_annotation_name = F,
show_legend = F,
annotation_name_gp = gpar(fontsize=6),
annotation_legend_param = list(labels_gp = gpar(fontsize = 6),
title_gp = gpar(fontsize = 6),
direction = "horizontal",
legend_height = unit(.1, "cm"),
grid_width = unit(.2, "cm")),
simple_anno_size = unit(1, "mm"))
top.anno <- HeatmapAnnotation(df = rowAnno.df %>% transmute(Assay, secretome_location) %>% column_to_rownames("Assay"),
which = "column",
show_legend = c(TRUE),
show_annotation_name = F,
annotation_name_gp = gpar(fontsize = 6),
annotation_legend_param = list(title = "HPA\nclassification",
title_gp = gpar(fontsize = 6),
labels_gp = gpar(fontsize = 6),
legend_height = unit(3, "mm"),
grid_width = unit(3, "mm")),
col = list(secretome_location = c(secretome_location_cols)),
simple_anno_size = unit(3, "mm"),
na_col = "grey90")
## getting foldchange (dNPX over median convalescence)
m <- fc_over_median_M12 %>%
filter(Assay %in% rowAnno.df$Assay) %>%
pivot_wider(names_from = Assay, values_from = dNPX,id_cols = study_id) %>%
column_to_rownames("study_id") %>%
as.matrix() %>% t()
m <- m[rownames(mat_pbmc_acute),]
bottom.anno <- HeatmapAnnotation("Plasma protein\ndNPX" = anno_boxplot(t(m),
height = unit(1.5, "cm"),width = unit(1.5,"cm"),
box_width = 0.8,
axis_param = list(side = "right",
labels_rot = 45,
gp=gpar(fontsize = 5)),
gp = gpar(fill="#C51B7D"),
outline = FALSE),
annotation_name_rot = 0,
annotation_name_gp = gpar(fontsize = 5),
annotation_name_side = "right",
simple_anno_size = unit(3, "mm"),
which = "column")
(pbmc_l2_acute_hm.wide <- mat_pbmc_acute[rownames(m),] %>%
t() %>%
## scale values from 0-1
as.data.frame() %>% mutate(across(where(is.numeric), ~ scales::rescale(., to=c(0,1)))) %>%
as.matrix() %>%
ComplexHeatmap::Heatmap(
name="average\ngene\nexpression",
col = scaled_01_col,
top_annotation = top.anno,
bottom_annotation = bottom.anno,
right_annotation = right.anno,
column_dend_height = unit(2, "mm"),
cluster_rows = F,
row_dend_reorder = TRUE,
show_row_names = TRUE,
column_split = rowAnno.df$secretome_function,
column_dend_reorder = F,
row_title_side = "right",
row_title_gp = gpar(fontsize = 6),
cluster_columns = T,
row_split = col.anno.df$colanno,
row_title = NULL,
column_title_gp = gpar(fontsize=4),
column_title_rot = 45,
row_title_rot = 0,
row_names_gp = gpar(fontsize = 4),
#row_dend_width = unit(2, "mm"),
row_dend_side = "left",
cluster_row_slices = T,
column_names_gp = gpar(fontsize = 4),
column_names_rot = 90,
heatmap_legend_param = list(labels_gp = gpar(fontsize = 6),
title_gp = gpar(fontsize = 6),
grid_width = unit(3, "mm")),
)
)
CellPhoneDB
#cpdb.protein_input <- read_delim("../data/cellphoneDB/v4.1.0_protein_input.csv")
#cpdb.interaction_input <- read_delim("../data/cellphoneDB/v4.1.0_interaction_input.csv")
cpdb.protein_input <- read_delim("../data/cellphoneDB/v5_protein_input.csv",)
cpdb.interaction_input <- read_delim("../data/cellphoneDB/v5_interaction_input.csv")
kegg.ccr <- read_excel("../data/KEGG_CytokineCytokineReceptorInteraction_malariaspec.xlsx") %>% mutate(Source = "KEGG")
rna.markers.uniprot <- data.frame(gene = rna.markers) %>%
left_join(hpa_24.0 %>% transmute(gene, uniprot)) %>% na.omit()
ligand.q <- dap.res %>% filter(p.adj <=0.01, logFC > .1) %>%
left_join(cpdb.protein_input,
by=c("UniProt"="uniprot")) %>%
pull(UniProt)
length(ligand.q)
## [1] 502
nw <- cpdb.interaction_input %>%
filter(partner_a %in% ligand.q,
directionality == "Ligand-Receptor") %>%
mutate(protein_name_b_strip = gsub("_HUMAN","",protein_name_b),
protein_name_a = gsub("_HUMAN","",protein_name_a)) %>%
mutate(protein_name_b_complex = case_when(is.na(protein_name_b) ~ str_remove(interactors,paste0(protein_name_a,"-")),
.default = protein_name_b)) %>%
separate_longer_delim(protein_name_b_complex, delim = "+") %>%
left_join(hpa_24.0 %>% transmute(protein_name_b_complex = gene,
uniprot_b_complex = uniprot), by=c("protein_name_b_complex")) %>%
mutate(protein_name_b = case_when(is.na(protein_name_b) ~ protein_name_b_complex,
.default = protein_name_b),
partner_b_new = case_when(is.na(uniprot_b_complex) ~ partner_b,
.default = uniprot_b_complex)) %>%
transmute(partner_a, partner_b, partner_b_new) %>%
filter(partner_b_new %in% rna.markers.uniprot$uniprot) %>%
mutate(uniprot_a = partner_a,
uniprot_b = partner_b_new)
measured.in.plasma <- dap.res %>% filter(p.adj <=0.01, logFC > 0.1) %>% pull(UniProt)
measured.in.plasma.name <- dap.res %>% filter(p.adj <=0.01, logFC > .1) %>% pull(Assay)
G <- as_tbl_graph(nw %>% transmute(from = uniprot_a,
to = uniprot_b))
node_table <- as_tibble(G) %>%
left_join(dap.res %>% mutate(measured.as.soluble = T),
by=c("name"="UniProt")) %>%
left_join(hpa_24.0 %>% transmute(gene, uniprot),
by=c("name"="uniprot")) %>%
mutate(protein_name = gene,
measured.as.soluble = case_when(is.na(measured.as.soluble) ~F,
.default = T))# %>%
(ligand_receptor_nw <- G %>%
inner_join(node_table,by="name") %>%
create_layout(layout = "fr") %>%
ggraph() +
geom_edge_link(alpha=.02) +
geom_edge_fan(width = .5, color = "grey90") +
geom_node_point(aes(color=if_else(measured.as.soluble==T,logFC,NA),
size= if_else(measured.as.soluble==T,logFC,1))) +
guides(color = guide_colourbar(barwidth = 3, barheight = .75),
size=F) +
labs(color="logFC of proteins\nin plasma") +
scale_size(range=c(1,3.5)) +
geom_node_text(aes(label = protein_name,
color= if_else(measured.as.soluble==T,logFC,NA)),
size=1,
repel=T) +
scale_color_continuous(low="thistle2",
high="darkred",
guide="colorbar",
na.value="grey20") +
theme_void() +
theme(legend.position = "bottom",
plot.title = element_text(size=6),
legend.title = element_text( size=6),
legend.text=element_text(size=6))
)
receptor_fam <- list(cxc_subfam = c("CXCR1","CXCR2","CXCR3","CXCR4","CXCR5","CXCR6","CXCR7","XCR1","CX3CR1"),
cc_subfam = paste0("CCR",1:11),
class1helicalcyto_fam = c("IL2RA","IL4R"),
class2helicalcyto_fam = c("IL10RA","IL10RB"),
prolaction_fam = c("GHR","CSF3R"),
ifn_fam =c("IFNAR1","IFNAR2","IFNGR1","IFNGR2"),
il1likecyto_fam = c("IL1R1","IL1RAP","IL1R2","IL18R1","IL18RAP","ST2","IL1RAP"),
nonclassified = c("CD4","CSF1R"),
tnf_fam = c("TNFR1","TNFR2","HVEM","FAS","DR4","DR5","DCR1","DCR2","EDAR","RANK","CD27","CD30","CD40","Ox40","TACI"),
tgfb_fam = c("TGFBR2","ACVR2B","ACVR1B")) %>%
enframe() %>% unnest(cols = c(value)) %>% dplyr::rename(subfam = name, receptor = value)
#extract all transmembrane receptors from CellPhoneDB as uniprotIDs
## filter CellPhoneDB protein_input for transmembrane & receptors
cpdb.receptor.transmem.name <- cpdb.protein_input %>% filter(transmembrane==T |
receptor==T) %>%
mutate(protein_name = gsub("_HUMAN","",protein_name)) %>%
pull(uniprot)
df <- pbmc_acute.avg.wide %>%
right_join(rna.markers.uniprot) %>%
filter(uniprot %in% nw$uniprot_b)
geneAnno <- df %>% transmute(gene,uniprot) %>% left_join(receptor_fam,by=c("gene"="receptor")) %>%
mutate(subfam = ifelse(is.na(subfam),"Other",subfam),
CPDB = ifelse(uniprot %in% cpdb.receptor.transmem.name,T,F),
KEGG = ifelse(gene %in% receptor_fam$receptor,T,F),
in_plasma = ifelse(gene %in% measured.in.plasma.name,T,F))
mat <- df %>%
dplyr::select(-uniprot) %>%
column_to_rownames("gene") %>%
as.matrix()
col.anno.df <- data.frame(colnames = colnames(mat)) %>%
transmute(colnames,
colanno = case_when(grepl("DC",colnames) ~ "DC",
grepl("monocytes",colnames) ~ "Monocytes",
grepl("CD4",colnames) ~ "CD4+ T",
grepl("CD8",colnames) ~ "CD8+ T",
grepl("B|Plasma",colnames) ~ "B",
grepl("NK",colnames) ~ "NK",
grepl("gdT",colnames) ~ "gdT",
.default = "undefined"),
colanno = factor(colanno, levels= c("DC","Monocytes","NK","gdT","B","CD4+ T","CD8+ T","undefined")))
colAnn.top <- HeatmapAnnotation(df = col.anno.df %>% column_to_rownames("colnames"),
which = "col",
col = list(colanno = L1_colors),
show_annotation_name = F,
show_legend = F,
annotation_name_gp = gpar(fontsize=6),
annotation_legend_param = list(labels_gp = gpar(fontsize = 6),
title_gp = gpar(fontsize = 6),
direction = "horizontal",
legend_height = unit(.1, "cm"),
grid_width = unit(.2, "cm")),
simple_anno_size = unit(1, "mm")
)
m <- mat %>%
t() %>%
as.data.frame() %>%
mutate(across(where(is.numeric), ~ scales::rescale(., to=c(0,1)))) %>%
as.matrix()
(pbmc_l2_acute_cellphonedb_hm.wide <- m %>%
ComplexHeatmap::Heatmap(name="average\ngene\nexpression",
column_split = geneAnno$subfam,
cluster_columns = T,
cluster_column_slices = T,
bottom_annotation = HeatmapAnnotation(df=geneAnno %>% transmute(in_plasma),
which="column",
annotation_legend_param = list(title_gp = gpar(fontsize = 6),
labels_gp = gpar(fontsize = 6)),
annotation_name_gp = gpar(fontsize = 5),
simple_anno_size = unit(1.5, "mm"),na_col = c("white","white","white"),
show_legend = c(FALSE,FALSE,FALSE),
gp = gpar(col = "grey90"),
col=list(CPDB = c("TRUE" = "grey60", "FALSE" = "white","NA" = "white"),
KEGG = c("TRUE" = "grey60", "FALSE" = "white","NA" = "white"),
in_plasma = c("TRUE" = "darkred", "FALSE" = "white","NA" = "white"))),
show_row_dend = F,
row_title_side = "right",
row_title_rot = 0,
row_title_gp = gpar(fontsize = 5),
row_title = NULL,
col = scaled_01_col,
row_names_gp = gpar(fontsize = 4),
show_column_dend = T,
column_dend_height = unit(2,"mm"),
row_dend_width = unit(2, "mm"),
row_dend_side = "left",
clustering_method_columns = "mcquitty",
right_annotation = rowAnnotation(df = col.anno.df %>% column_to_rownames("colnames"),
col = list(colanno = L1_colors),
show_annotation_name = F,
show_legend = F,
annotation_name_gp = gpar(fontsize=6),
annotation_legend_param = list(labels_gp = gpar(fontsize = 6),
title_gp = gpar(fontsize = 6),
direction = "horizontal",
legend_height = unit(.1, "cm"),
grid_width = unit(.2, "cm")),
simple_anno_size = unit(1, "mm")
),
cluster_rows = F,
row_split = col.anno.df$colanno,
column_title_gp = gpar(fontsize=4),
column_title_rot = 45,
column_names_gp = gpar(fontsize = 4),
heatmap_legend_param = list(labels_gp = gpar(fontsize = 6),
title_gp = gpar(fontsize = 6),
grid_width = unit(3, "mm")))
)
related to main Figure 3
(rhapsody_cells_per_sample <- tibble(pbmc@meta.data) %>%
#as.data.table %>% # the resulting md object has one "row" per cell
rownames_to_column("CellID") %>%
group_by(orig.ident,Time) %>%
dplyr::count() %>%
ggplot(aes(x=orig.ident,y=n, fill=Time)) +
scale_fill_manual(values = TIME_colors,breaks = c("Acute","D10","Y1")) +
scale_y_continuous(expand = c(0,0), trans = "sqrt") +
coord_flip()+
geom_bar(stat="identity", position="dodge", show.legend = TRUE) +
geom_hline(yintercept=6000,lwd=.2) +
labs(y="Number of cells",
x="",
fill="") +
theme_minimal())
## integration
(rhapsody_umap_ggplot_int_orig.ident <- rhapsody_umap_coords %>%
ggplot(aes(x = wnnUMAP_1, y = wnnUMAP_2)) +
geom_point(aes(color = as.character(orig.ident)), size = 0.1, alpha=.1) +
labs(x = 'wnnUMAP 1', y = 'wnnUMAP 2', color=NULL) +
guides(color = guide_legend(override.aes = list(alpha = 1,size=.25))) +
my_dimred_theme
)
## integration
(rhapsody_umap_ggplot_int_time <- rhapsody_umap_coords %>%
ggplot(aes(x = wnnUMAP_1, y = wnnUMAP_2)) +
geom_point(aes(color = as.character(Time)), size = 0.25, alpha=.1) +
labs(x = 'wnnUMAP 1', y = 'wnnUMAP 2', color=NULL) +
scale_color_manual(values = TIME_colors,breaks = c("Acute","D10","Y1")) +
guides(color = guide_legend(override.aes = list(alpha = 1,size=.25))) +
my_dimred_theme
)
(vln_adt.weight <- VlnPlot(pbmc, features = "adt.CCA.weight", group.by = 'CellType_L2', cols = L2_colors, sort = TRUE, pt.size = 0) +
NoLegend() +
labs(title = "adt weight", y="ADT weight",x=NULL))
rna.marker4dotplot <- c("S100A12","CD14","S100A9","VMO1","C1QA","FCGR3A", "KLRF1","KIR2DL1",
"GNLY","IL12RB2","IL18R1","GZMK","TYMS","TOP2A", "KIAA0101","CCR7","LEF1",
"MYC", "PASK","CD28","ICOS","RGS1","CTLA4","CCR5","LAG3","POU2AF1",
"FOXP3","IL2RA","CD8A","ZNF683","RORC","IKZF2","MS4A1","CD79A","IGKC",
"TCL1A","FCER2","CD200","TNFRSF17","FCER1A","CLEC10A","CD1C","NRP1","TLR9","TLR7")
(dotplot.rna <- DotPlot(pbmc,
features = rna.marker4dotplot,
assay = "RNA",
cols = c("RdYlBu"),
col.min = -2.5,
col.max = 2.5,
dot.min = 0,
dot.scale = 1,
idents = NULL,
group.by = NULL,
split.by = NULL,
cluster.idents = F,
scale = TRUE,
scale.by = "radius",
scale.min = NA,
scale.max = NA) +
RotatedAxis() +
theme(axis.text.x=element_text(size=6),
axis.text.y=element_text(size=6),
text = element_text(size=6)) +
labs(x="",y="", title="mRNA expression")
)
DotPlot(pbmc,
features = rownames(pbmc@assays$ADT),
assay = "ADT",
cols = c("RdYlBu"),
col.min = -2.5,
col.max = 2.5,
dot.min = 0,
dot.scale = 1,
idents = NULL,
group.by = NULL,
split.by = NULL,
cluster.idents = F,
scale = TRUE,
scale.by = "radius",
scale.min = NA,
scale.max = NA) +
theme(axis.text.x=element_text(size=6), # cell subsets
axis.text.y=element_text(size=6),
text = element_text(size=6)) +
RotatedAxis() +
labs(x="",y="", title="Surface protein expression")
(cellnumbers_l2 <- tibble(pbmc_acute@meta.data) %>%
group_by(CellType_L2) %>%
count() %>%
ggplot(aes(fill = CellType_L2, y=n, x=fct_reorder(CellType_L2,n))) +
geom_col(show.legend = F) +
scale_fill_manual(values = L2_colors) +
scale_y_continuous(expand = c(0, 0),trans = "log10", breaks=c(1,10,100,1000,5000)) +
coord_flip() +
labs(x=NULL,
y="Number of cells") +
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.border = element_blank()))
genes.oi.timepoints = VlnPlot(pbmc,
features = c("CD163", "IL1B", "IL1RN", "ICAM1", "LILRB4", "CXCL10", "S100A12", "NAMPT",
"CCL2", "CXCL11", "CXCL9", "AZU1","VMO1", "TNFRSF8", "CHI3L1","CCL4", "GZMA",
"GZMB","GZMH","CST7","TNFRSF9","IL2RA","IL1RL1","CD48", "CD27", "CD38", "HAVCR2"),
group.by = 'CellType_L2', assay = "RNA",
split.by = "Time", cols = time3_col, sort = F, pt.size = 0,stack = T,flip = F) +
theme(axis.text.x = element_text(angle = 0, size=6),
axis.text.y = element_text(size=6),
strip.text.x = element_text(angle = 90, size = 5, face=NULL,hjust = .5),
axis.title.y = element_blank(),
axis.title.x = element_text(size=6),
# strip.text.y = element_text(size = 6),
# strip.text.x = element_text(size=6),
legend.position = "right",
legend.key.size = unit(.2, 'cm'), #change legend key size
legend.key.height = unit(.2, 'cm'), #change legend key height
legend.key.width = unit(.2, 'cm'), #change legend key width
legend.title = element_text(size=5), #change legend title font size
legend.text = element_text(size=5))
genes.oi.timepoints$layers[[1]]$aes_params$size = .1
genes.oi.timepoints
related to main Figure 3
dooley <- readRDS("../data/data/ReAnalysis_DooleyNL_etal_bioRxiv_2022/annotated_Sabah_data_21Oct2022.rds")
dooley_colors <- setNames(randomcoloR::distinctColorPalette(length(unique(dooley@meta.data$celltype))),
unique(dooley@meta.data$celltype))
## Preliminary QC check
tibble(dooley@meta.data) %>% # the resulting md object has one "row" per cell
rownames_to_column("CellID") %>%
group_by(orig.ident,timepoint,sample.day,ID) %>%
dplyr::count() %>%
ungroup() %>%
ggplot(aes(x=fct_reorder(orig.ident, timepoint),y=n, fill=ID)) +
scale_y_continuous(expand = c(0, 0)) +
coord_flip()+
geom_bar(stat="identity", position="dodge", show.legend = F) +
labs(title = "Number of cells per sample",
x=NULL) +
theme_minimal()
dooley_umap_coords <- data.table::data.table(dooley@meta.data, Embeddings(object = dooley, reduction = 'umap')) %>% rownames_to_column("CellID")
lable_df <- dooley_umap_coords %>%
dplyr::group_by(celltype) %>%
dplyr::select(celltype, contains("UMAP")) %>%
summarise_all(mean)
(dooley_umap_ggplot <- dooley_umap_coords %>%
ggplot(aes(x = UMAP_1, y = UMAP_2)) +
geom_point(aes(color = as.character(celltype)), size = 0.1, alpha=.5,show.legend = F) +
ggrepel::geom_text_repel(data=lable_df,aes(x=UMAP_1,y=UMAP_2, label=celltype),size=2) +
labs(x = 'UMAP 1', y = 'UMAP 2') +
scale_color_manual(values=dooley_colors) +
my_dimred_theme)
## Percentage celltype in sample
(dooley_per_sample_perc <- tibble(dooley@meta.data) %>%
group_by(timepoint,orig.ident) %>%
count(celltype) %>%
# Stacked + percent
ggplot(aes(fill = celltype, y=n, x=orig.ident)) +
geom_bar(position="fill", stat="identity",width = 0.9) +
facet_grid(~timepoint,scales = "free_x",space = "free_x") +
scale_y_continuous(labels = scales::percent) +
scale_fill_manual(values=dooley_colors) +
scale_y_continuous(labels = scales::percent,expand = c(0,0)) +
labs(x = "",
y = "Frequency",
fill="") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
legend.text = element_text(size=6),
legend.position = "top",
panel.grid.major = element_blank()))
get only acute malaria cells (Day0)
dooley_0 <- subset(dooley, subset=timepoint=="Day0")
#- RNA Normalization
dooley_0 <- NormalizeData(object = dooley_0, assay = 'RNA', normalization.method = 'LogNormalize', scale.factor = 10000)
(dooley_0_cellnumbers <-
tibble(dooley_0@meta.data) %>%
group_by(celltype) %>%
count() %>%
ggplot(aes(fill = celltype, y=n, x=fct_reorder(celltype,n))) +
geom_col(show.legend = F) +
scale_y_continuous(expand = c(0, 0),trans = "log10") +
coord_flip() +
scale_fill_manual(values=dooley_colors) +
labs(x=NULL,
y="Number of cells") +
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.border = element_blank()))
## calculation of pseudobulk, for each identity based on count data
dooley_0.avg.wide <- log1p(AverageExpression(dooley_0, group.by = "celltype", slot = "counts", verbose = FALSE)$RNA) %>%
as.data.frame() %>%
rownames_to_column("gene")
dooley_0.avg.long <- dooley_0.avg.wide %>%
pivot_longer(names_to = "celltype", values_to = "avgExp",cols = -gene) %>% filter(avgExp >0)
dooley.gene.match <- full_mapping %>%
filter(UniProt %in% wilcoxUp) %>%
filter(Symbol %in% unique(dooley_0.avg.long$gene))
mat_dooley_0 <- dooley_0.avg.wide %>%
filter(gene %in% dooley.gene.match$Symbol) %>%
column_to_rownames("gene") %>%
as.matrix()
## Rhapsody vs. Dooley
##binning of cell immune cell subsets
#dim(mat_dooley_0)
#dim(mat_pbmc_acute)
## gene overlap Rhapsody, Dooley et al, Explore
rhapsody_dooley_overlapp <- intersect(rownames(mat_dooley_0),rownames(mat_pbmc_acute))
compare_dooley_rhapsody <- data.frame(mat_dooley_0[rhapsody_dooley_overlapp,]) %>%
rownames_to_column("gene") %>%
pivot_longer(cols = -gene) %>%
mutate(origin = "dooley") %>%
bind_rows(
data.frame(mat_pbmc_acute[rhapsody_dooley_overlapp,]) %>%
rownames_to_column("gene") %>%
pivot_longer(cols = -gene) %>%
mutate(origin = "rhapsody")) %>%
mutate(name.common = ifelse(grepl("CD14",name),"CD14mono",
ifelse(grepl("CD16.monocytes",name,ignore.case = T),"CD16mono",
ifelse(grepl("pDC",name),"pDC",
ifelse(grepl("mDC",name),"mDC",
ifelse(grepl("NKT",name),"NKT",
ifelse(grepl("gdT|γδ.T.cells",name),"gdTcell",
ifelse(grepl("CD8",name),"CD8T",
ifelse(grepl("CD4",name),"CD4T",
ifelse(grepl("NK",name),"NK",
ifelse(grepl("B",name),"Bcell",
ifelse(grepl("Unknown|undefined",name),"undefined",
name))))))))))))
common.celltypes <- intersect(filter(compare_dooley_rhapsody,origin=="dooley") %>% pull(name.common),
filter(compare_dooley_rhapsody,origin=="rhapsody") %>% pull(name.common))
dooley_hm <- compare_dooley_rhapsody %>%
filter(origin=="dooley", name.common %in% common.celltypes) %>%
pivot_wider(names_from = gene, values_from = value,id_cols = name.common) %>%
column_to_rownames("name.common") %>%
## scale values from 0-1
as.data.frame() %>% mutate(across(where(is.numeric), ~ scales::rescale(., to=c(0,1)))) %>%
as.matrix() %>%
t() %>%
Heatmap(name="Dooley",
column_title = "Dooley et al.",
column_title_gp = gpar(fontsize=6),
column_order = common.celltypes,
col = scaled_01_col,
cluster_rows = TRUE,
row_dend_reorder = TRUE,
show_row_names = TRUE,
show_heatmap_legend = F,
row_title_gp = gpar(fontsize = 5),
row_title_rot = 0,
row_names_gp = gpar(fontsize = 4),
row_dend_width = unit(5, "mm"),
column_names_gp = gpar(fontsize = 5),
column_names_rot = 90,
heatmap_legend_param = list(labels_gp = gpar(fontsize = 5),
title_gp = gpar(fontsize = 5)),
width = nrow(.)*unit(.3, "mm"),
height = ncol(.)*unit(6, "mm"),
)
rhapsody_hm <- filter(compare_dooley_rhapsody, origin=="rhapsody", name.common %in% common.celltypes) %>%
pivot_wider(names_from = gene, values_from = value,id_cols = name.common,values_fn = median) %>%
column_to_rownames("name.common") %>%
## scale values from 0-1
as.data.frame() %>% mutate(across(where(is.numeric), ~ scales::rescale(., to=c(0,1)))) %>% #scale(.))) %>%
as.matrix() %>%
t() %>%
Heatmap(name="average\ngene\nexpression",
column_title = "This study",
column_title_gp = gpar(fontsize=6),
column_order = common.celltypes,
col = scaled_01_col,
cluster_rows = TRUE,
row_dend_reorder = TRUE,
show_row_names = TRUE,
row_title_gp = gpar(fontsize = 5),
row_title_rot = 0,
row_names_gp = gpar(fontsize = 4),
row_dend_width = unit(5, "mm"),
column_names_gp = gpar(fontsize = 5),
column_names_rot = 90,
heatmap_legend_param = list(labels_gp = gpar(fontsize = 5),
title_gp = gpar(fontsize = 5),
title_position = "topcenter"
),
width = nrow(.)*unit(.3, "mm"),
height = ncol(.)*unit(6, "mm"),
)
compare_dooley_rhapsody_hm_new <- dooley_hm + rhapsody_hm
draw(compare_dooley_rhapsody_hm_new, row_dend_side = "left", main_heatmap = "average\ngene\nexpression",auto_adjust = F)
dim(mat_dooley_0)
## [1] 213 15
row.anno.df <- data.frame(Assay = rownames(mat_dooley_0)) %>%
left_join(dap.res,by=c("Assay")) %>%
left_join(hpa_24.0,by=c("Assay"="gene")) %>%
mutate(secretome_function = ifelse(is.na(secretome_function),"Not secreted", secretome_function)) %>%
filter(secretome_function != "Not secreted")
rowAnno <- HeatmapAnnotation(df = row.anno.df %>% transmute(Assay,secretome_location) %>% column_to_rownames("Assay"),
which = "row",
show_legend = c(TRUE),
show_annotation_name = F,
annotation_name_gp = gpar(fontsize = 5),
annotation_legend_param = list(title = "HPA\nclassification",
title_gp = gpar(fontsize = 5),
labels_gp = gpar(fontsize = 5),
direction="horizontal",
legend_height = unit(1, "mm"),
grid_width = unit(3, "mm"),
title_position = "topleft"),
col = list(secretome_location = secretome_location_cols),
simple_anno_size = unit(3, "mm"),
na_col = "grey90")
(dooley_day0_hm.hpa.mapping <- mat_dooley_0[row.anno.df$Assay,] %>%
t() %>%
## scale values from 0-1
as.data.frame() %>% mutate(across(where(is.numeric), ~ scales::rescale(., to=c(0,1)))) %>%
as.matrix() %>%
t() %>%
ComplexHeatmap::Heatmap(
name="average\ngene\nexpression\n",
col = scaled_01_col,
right_annotation = rowAnno,
column_dend_height = unit(2, "mm"),
cluster_rows = TRUE,
row_dend_reorder = TRUE,
show_row_names = TRUE,
row_split = row.anno.df$secretome_function,
row_title_side = "right",
row_title_gp = gpar(fontsize = 5),
row_title_rot = 0,
row_names_gp = gpar(fontsize = 4),
row_dend_width = unit(4, "mm"),
column_names_gp = gpar(fontsize = 5),
column_names_rot = 90,
heatmap_legend_param = list(labels_gp = gpar(fontsize = 5),
title_gp = gpar(fontsize = 5)),
height = ncol(.)*unit(8, "mm"),
width = ncol(.)*unit(2,"mm"))
)
mat_dooley_0 <- dooley_0.avg.wide %>%
filter(gene %in% dooley.gene.match$Symbol) %>%
column_to_rownames("gene") %>%
as.matrix()
dim(mat_dooley_0)
## [1] 213 15
row.anno.df <- data.frame(Assay = rownames(mat_dooley_0)) %>%
left_join(dap.res,by=c("Assay")) %>%
left_join(hpa_24.0,by=c("Assay"="gene")) %>%
mutate(secretome_function = ifelse(is.na(secretome_function),"Not secreted", secretome_function)) %>%
filter(secretome_function == "Not secreted")
(dooley_day0_hm.no.hpa.mapping <- mat_dooley_0[row.anno.df$Assay,] %>%
t() %>%
## scale values from 0-1
as.data.frame() %>% mutate(across(where(is.numeric), ~ scales::rescale(., to=c(0,1)))) %>% #scale(.))) %>%
as.matrix() %>%
t() %>%
ComplexHeatmap::Heatmap(name="average\ngene\nexpression",
col = scaled_01_col,
show_heatmap_legend = F,
column_dend_height = unit(2, "mm"),
cluster_rows = TRUE,
row_dend_reorder = TRUE,
show_row_names = TRUE,
row_split = row.anno.df$secretome_function,
row_title_side = "right",
row_title_gp = gpar(fontsize = 5),
row_title_rot = 0,
row_names_gp = gpar(fontsize = 4),
row_dend_width = unit(4, "mm"),
column_names_gp = gpar(fontsize = 5),
column_names_rot = 90,
heatmap_legend_param = list(labels_gp = gpar(fontsize = 5),
title_gp = gpar(fontsize = 5)),
height = ncol(.)*unit(8, "mm"),
width = ncol(.)*unit(2,"mm"))
)
Protein profile-based patient stratification of disease severity
** related to main Figure 4**
Figure S8A
clin_marker_cols <- c("CRP","Creatinine","Parasitemia","Platelets","Bilirubin","ASAT","ALAT","Hemoglobin")
clin_marker_cols <- setNames(brewer.pal(length(clin_marker_cols),name="Set3"), clin_marker_cols)
clinical_variables_4circos <- subjectTable %>%
left_join(clinchem_study_pats_acute.wide, by="study_id") %>%
#inner_join(patient_clust,by="study_id") %>%
ungroup() %>%
pivot_longer(cols = c(plt_count_min,inf_rbc_max,crp_max,hb_min,bili_max,crea_max,"p_alat","p_asat"),
names_to = "clin.var", values_to="clin.val",
) %>%
drop_na(clin.val) %>%
group_by(clin.var) %>%
mutate(n_group= as.character(n()),
label_group= factor(paste0('\n n = ', n_group))) %>%
mutate(clin.var = case_when(clin.var=="crp_max"~"CRP",
clin.var=="p_alat"~"ALAT",
clin.var=="p_asat"~"ASAT",
clin.var=="plt_count_min"~"Platelets",
clin.var=="inf_rbc_max" ~"Parasitemia",
clin.var=="bili_max"~"Bilirubin",
clin.var=="hb_min" ~"Hemoglobin",
clin.var=="crea_max"~"Creatinine",
.default=clin.var)) %>%
mutate(clin.var = factor(clin.var, levels=names(clin_marker_cols)))
(clin_para_whole_cohort <- clinical_variables_4circos %>%
ggplot(aes(x=label_group, y=clin.val, fill=clin.var)) +
geom_violin(trim=F, show.legend = F, width=.4,lwd=.25) +
geom_jitter(size=0.05,width = .1, show.legend = F,lwd=.25) +
geom_boxplot(alpha=.7, outlier.shape = NA, width=.2, show.legend = F,lwd=.25) +
facet_wrap(~clin.var, scales = "free", nrow = 4,
labeller = labeller(clin.var = c("Bilirubin"= "Bilirubin\n(\U003BCmol/L)",
"ALAT"="ALT\n(U/L)",
"ASAT"="AST\n(U/L)",
"CRP"="CRP\n(mg/L)",
"Parasitemia"="Parasitemia\n(%)",
"Creatinine"="Creatinine\n(\U003BCmol/L)",
"Hemoglobin"="Hemoglobin\n(g/dL)",
"Platelets"="Platelet\n(counts)"))) +
theme_bw(base_size = 6)+
labs(y="Clinical parameter value",
x=NULL) +
scale_fill_manual(values=clin_marker_cols))
patient_SOFA <- subjectTable %>% dplyr::select(study_id, contains("SOFA"))
who22_severemalaria <- subjectTable %>%
transmute(study_id,
respiratory_distress = case_when(pulm_edema == 1 |
resp_distress == 1 |
ards == 1 ~ 1,
.default = 0),
circ_80,
hb_70 = case_when(hb_min <= 70 ~ 1,
hb_min >70 ~0,
.default = NA),
bili_50,
crea_265 = case_when(crea_max >= 265 ~ 1,
crea_max <265 ~ 0,
.default = NA),
parasitaemia_2 = case_when(inf_rbc_max >= 2 ~ 1,
inf_rbc_max < 2 ~ 0,
.default = NA),
parasitaemia_5 = case_when(inf_rbc_max >= 5 ~ 1,
inf_rbc_max < 5 ~ 0,
.default = NA)
)
mat <- who22_severemalaria %>%
column_to_rownames("study_id") %>%
as.matrix() %>%
t()
mat_sofa_total <- data.frame(study_id = colnames(mat)) %>%
left_join(patient_SOFA, by="study_id")
study_id_SOFA.sorted <- mat_sofa_total %>% arrange(-SOFA_total) %>% pull(study_id)
mat <- mat[,study_id_SOFA.sorted]
severesign_count <- who22_severemalaria %>%
transmute(study_id,
respiratory_distress,
circ_80,
hb_70,
bili_50,
crea_265,
parasitaemia_5) %>%
replace(is.na(.), 0) %>%
rowwise() %>%
mutate(nr_of_severe_signs = sum(c_across(where(is.numeric)))) %>%
transmute(study_id,
nr_of_severe_signs)
(hm.sofa.clin <- mat %>%
Heatmap(name = "Severe malaria symptoms\ndefined by WHO 2015",
col = c("0"="white","1"="#C51B7D"),
column_names_gp = gpar(fontsize = 6),
na_col = "grey90",
cluster_columns = F,
cluster_rows = F,
show_row_dend = F,
show_column_dend = F,
show_column_names = F,
top_annotation = HeatmapAnnotation(df = data.frame(study_id = colnames(mat)) %>%
left_join(severesign_count) %>%
column_to_rownames("study_id"),
gp = gpar(fontsize=6),
annotation_legend_param = list(labels_gp = gpar(fontsize = 6),
title_gp = gpar(fontsize = 6),
direction = "horizontal",
title_position = "topcenter",
title = "Nr of\nsevere malaria\nsymptoms"),
simple_anno_size = unit(2, "mm"),
annotation_name_gp = gpar(fontsize=6),
col = list(nr_of_severe_signs = circlize::colorRamp2(c(0,6), c("white","orange")))),
row_title_side = "left",
row_title_rot = 0,
row_title_gp = gpar(fontsize = 6),
column_title_side = "top",
row_names_gp = gpar(fontsize = 6),
row_dend_width = unit(0.5, "cm"),
column_title_gp = gpar(fontsize = 6),
column_names_rot = 90,
heatmap_legend_param = list(labels_gp = gpar(fontsize = 6),
title_gp = gpar(fontsize = 6),
title_position = "topcenter",
at = c(0,1),
labels = c("no","yes")),
bottom_annotation = HeatmapAnnotation(df = data.frame(study_id = colnames(mat)) %>%
left_join(patient_SOFA, by="study_id") %>%
dplyr::select(-study_id) %>%
as.data.frame(),
which = 'col',
gp = gpar(fontsize=6),
simple_anno_size = unit(2, "mm"),
annotation_name_gp = gpar(fontsize=6),
col = list(SOFA_total = colorRamp2(c(min(patient_SOFA$SOFA_total,na.rm = TRUE),
median(patient_SOFA$SOFA_total,na.rm = TRUE),
max(patient_SOFA$SOFA_total,na.rm = TRUE)),
c(brewer.pal(3,name="PuBu"))),
SOFA_liver = SOFA_sub_col,
SOFA_cns = SOFA_sub_col,
SOFA_coag = SOFA_sub_col,
SOFA_resp = SOFA_sub_col,
SOFA_cardio = SOFA_sub_col,
SOFA_renal = SOFA_sub_col),
show_legend = c(T,F,F,F,F,F),
annotation_legend_param = list(SOFA_total = list(title = "SOFA (total)",
labels_gp = gpar(fontsize = 6),
title_gp = gpar(fontsize = 6),
direction = "horizontal",
title_position = "topcenter"
),
SOFA_cns = list(title="SOFA (subcategorical)",
labels_gp = gpar(fontsize = 6),
title_gp = gpar(fontsize = 6),
direction = "horizontal",
title_position = "topcenter"))),
width = ncol(.)*unit(1.3, "mm"),
height = nrow(.)*unit(1.6, "mm"),
rect_gp = gpar(col = "grey80", lwd = .2),
border_gp = gpar(col = "black", lty = .5)))
data4_pcaRes_FCmedian <- fc_over_median_M12 %>%
filter(Assay %in% c(dap.res %>% filter(FDR==TRUE, abs(logFC)>1) %>% pull(Assay))) %>%
pivot_wider(values_from = dNPX, names_from = Assay, id_cols = study_id) %>% column_to_rownames("study_id")
## PC calculation
pcaRes_FCmedian <- prcomp(data4_pcaRes_FCmedian, center = TRUE, scale. = TRUE)
varExp_FCmedian <- round(pcaRes_FCmedian$sdev^2 / sum(pcaRes_FCmedian$sdev^2) * 100)
#sum(varExp_FCmedian[1:6])
pcaDF_FCmedian <- data.frame(pcaRes_FCmedian$x) %>%
rownames_to_column("study_id") %>% dplyr::select(1:10) %>%
inner_join(data4_pcaRes_FCmedian %>% rownames_to_column("study_id"), by="study_id")
(pca_FCmedian <- pcaDF_FCmedian %>%
ggplot(aes(x=PC1,y=PC2)) +
geom_point(size=.5) +
my_dimred_theme +
coord_fixed(ratio = 1.75) +
labs(x=paste0("PC1 (",varExp_FCmedian[1],"%)"),
y=paste0("PC2 (",varExp_FCmedian[2],"%)"),
title = "dNPX (delta NPX of acute over convalescence median)",
caption = paste0("# samples: ",dim(data4_pcaRes_FCmedian)[1],
"\n # proteins: ",dim(data4_pcaRes_FCmedian)[2],
"\nlogFC>1")
))
(acute.dnpx.ellbow <- data.frame(PC = 1:10,
varExp = varExp_FCmedian[1:10]) %>%
ggplot(aes(x=PC, y=varExp)) +
scale_y_continuous(breaks = seq(0, 35, by = 5)) +
geom_point(size=.2) +
geom_line(lwd=.2) +
theme_minimal() +
scale_x_continuous(limits=c(1,10), breaks = c(1:10)))
df <- pcaDF_FCmedian %>%
dplyr::select(study_id,PC1:PC6) %>%
column_to_rownames("study_id")
set.seed(2023L)
km <- kmeans(df, centers = 3, nstart = 25,iter.max = 100)
km.res <- data.frame(study_id = rownames(df)) %>% inner_join(data.frame(cluster = km$cluster) %>% rownames_to_column("study_id"), by="study_id")
patient_clust <- km.res %>%
inner_join(subjectTable %>% transmute(study_id, SOFA_total),by="study_id") %>%
group_by(cluster) %>%
summarise(meanSOFA_total = mean(SOFA_total)) %>%
arrange(-meanSOFA_total) %>%
mutate(severity_lab = c("severe","moderate","mild")) %>%
rownames_to_column("rowname") %>%
mutate(rowname = as.numeric(rowname)) %>%
left_join(km.res %>% transmute(study_id,cluster)) %>%
mutate(cluster.orig = fct_reorder(as.factor(cluster),rowname),
severity_lab = fct_reorder(as.factor(severity_lab),rowname),
cluster = fct_reorder(as.factor(rowname),rowname))
patient_clust %>% write_tsv(file = paste0(result.dir,"PatientClustering.tsv"))
patient_clust %>% saveRDS(file = paste0(result.dir,"PatientClustering.rds"))
pca_FCmedian
acute.dnpx.ellbow
(pcaDF_FCmedian_PC1_6_hm <- pcaDF_FCmedian %>%
dplyr::select(study_id,PC1:PC6) %>%
column_to_rownames("study_id") %>%
t() %>%
Heatmap(name="PC value",
column_km = 3,
show_column_names = F,
row_names_gp = gpar(fontsize=6),
row_dend_width = unit(5, "mm"),
column_dend_height = unit(5,"mm"),
column_title_gp = gpar(fontsize = 6),
heatmap_legend_param = list(labels_gp = gpar(fontsize = 6),
title_gp = gpar(fontsize = 6),
legend_height = unit(5, "mm"),
title_position = "topcenter"))
)
df_acute_patclust_incl_conv <- data.long %>%
inner_join(sampleTable_simple %>% dplyr::select(DAid,Time,sample_id,study_id),by="sample_id") %>%
inner_join(patient_clust,by="study_id") %>%
filter(Time=="Acute") %>%
## adding data for M12 time point
bind_rows(data.long %>%
inner_join(sampleTable_simple %>% dplyr::select(DAid,Time,sample_id,study_id),by="sample_id") %>%
inner_join(patient_clust,by="study_id") %>%
filter(Time=="M12") %>%
mutate(severity_lab = "convalescence")) %>%
mutate(severity_lab = factor(as.factor(severity_lab),
levels=c("severe","moderate","mild","convalescence"),
labels=c("severe","moderate","mild","convalescence")))
my_comparisons_severe_conv <- list(c("severe", "moderate"), c("moderate", "mild"), c("severe", "mild"),c("mild","convalescence"))
(liver.tissueleakage.severity.plot <- df_acute_patclust_incl_conv %>%
filter(Assay %in% c("AGXT","HAO1")) %>%
ggplot(aes(x=severity_lab, y=NPX, color=severity_lab, fill=severity_lab)) +
geom_violin(trim = F,alpha=.9) +
geom_jitter(size=0.25,show.legend = F, width = 0.05, alpha=1, color="grey20") +
geom_boxplot(alpha=.7,width=0.25,outlier.shape = NA,color="black", fatten = 2,lwd=.25,show.legend = F) +
stat_compare_means(method = "wilcox.test",
label.sep = "\n",
hide.ns = T,
label = "p.signif" ,
vjust = .5,
size=2,
lwd = .2,
comparisons =my_comparisons_severe_conv,
show.legend = F) +
facet_wrap(~Assay,ncol = 8,scales = "free_y") +
theme_minimal() +
theme(legend.position="bottom",
axis.text.x = element_blank()) +
labs(x="",
color=NULL,
fill=NULL) +
scale_color_manual(values= c(patient_kclust3_lab_conv)) +
scale_fill_manual(values= c(patient_kclust3_lab_conv))
)
(acute.dnpx.pca.clustered <- pcaDF_FCmedian %>%
inner_join(patient_clust) %>%
ggplot(aes(x=PC1,y=PC2, color=cluster)) +
geom_point(size=.5) +
scale_color_manual(values=patient_kclust3) +
labs(color="Cluster",
title="dNPX") +
coord_equal(ratio = 1.5) + theme_minimal())
acute.dnpx.pca.clustered
ggExtra::ggMarginal(acute.dnpx.pca.clustered, type="density",groupColour = TRUE, groupFill = TRUE)
my_comparisons <- list(c("1", "2"), c("2", "3"), c("1", "3"))
(clusters_sofa <- subjectTable %>%
inner_join(patient_clust,by="study_id") %>%
ggplot(aes(x=cluster, y=SOFA_total, color=cluster, fill=cluster)) +
geom_jitter(width = 0.2,show.legend = T, size=0.5,alpha=.7) +
geom_boxplot(alpha=1,width=0.3,color="black",outlier.colour = NA, fatten = 2,lwd=.25,show.legend = F) +
labs(title = paste0("Sequential Organ Failure Assessment (SOFA) score")) +
scale_color_manual(values=patient_kclust3)+
scale_fill_manual(values=patient_kclust3) +
scale_y_continuous(limits = c(0,16)) +
stat_compare_means(method = "wilcox.test",
label.sep = "\n",
hide.ns = T,
label = "p.signif" ,
vjust = .5,
size=2,
lwd = .2,
comparisons =my_comparisons) +
theme_minimal()+
theme(legend.position ="none"))
prot.data.4.corr <- data.long %>%
inner_join(dap.res,by=c("Assay", "UniProt")) %>%
filter(p.adj<=0.05,
abs(logFC)>1,
) %>%
pivot_wider(values_from = NPX, names_from = Assay,id_cols = sample_id) %>%
inner_join(sampleTable_simple %>% filter(Time=="Acute") %>% transmute(sample_id,study_id), by="sample_id") %>%
dplyr::select(-sample_id) %>%
dplyr::select(study_id, everything())
clinical.feat.list <- c("inf_rbc_max", "resp_rate_max","sat", "syst_bp_min",
"p_alat", "p_asat",
"hb_min","wbc_count","plt_count_min","crp_max","bili_max","crea_max","SOFA_cns","SOFA_liver","SOFA_renal","SOFA_coag","SOFA_resp","SOFA_total")
clin.data.4.corr <-
subjectTable %>%
left_join(clinchem_study_pats_acute.wide, by="study_id") %>%
dplyr::select(study_id, all_of(clinical.feat.list))
my_comparisons_severe <- list(c("severe", "moderate"), c("moderate", "mild"), c("severe", "mild"))
df <- clin.data.4.corr %>%
dplyr::select(study_id, clinical.feat.list, -contains("SOFA")) %>%
pivot_longer(cols= -study_id) %>%
inner_join(patient_clust) %>%
na.omit() %>%
group_by(name, severity_lab) %>%
mutate(n_group= as.character(n()),
label_group= factor(paste0('n = ', n_group))) %>%
ungroup() %>%
group_by(name) %>%
mutate(label_pos = min(value),
subcat = case_when(name %in% c("bili_max", "p_alat","p_asat") ~ "Liver function",
name %in% c("hb_min","wbc_count", "plt_count") ~ "Blood cells",
name %in% c("resp_rate_max","sat","syst_bp_min") ~ "Circulation",
.default=NA)) %>%
ungroup() %>%
mutate(name = factor(name, levels = c("crp_max","crea_max","inf_rbc_max",
"hb_min","wbc_count","plt_count_min",
"bili_max","p_asat","p_alat",
"resp_rate_max","sat","syst_bp_min"
)))
single_facet_fun = function(data)(
data %>%
ggplot(aes(x=severity_lab, y=value, fill= severity_lab)) +
geom_violin(trim=F, show.legend = F, width=.6,lwd=.25) +
geom_jitter(size=0.05,width = .1, show.legend = F) +
geom_boxplot(aes(fill=severity_lab),alpha=.7, outlier.shape = NA,width=.2, show.legend = F,lwd=.25) +
theme_bw(base_size = 6) +
scale_y_continuous(expand=c(.2,0))+
facet_grid(~label_name) +
theme(axis.title.x = element_blank()) +
scale_fill_manual(values=patient_kclust3_lab) +
stat_compare_means(method = "wilcox.test",
label.sep = "\n",
hide.ns = T,
label = "p.signif",
vjust = 0.5,
size=2,
comparisons = my_comparisons_severe) +
labs(fill=NULL,
x=NULL) +
scale_x_discrete(labels=data$label_group))
##
p_list <- df %>%
mutate(name = factor(name, levels = c("crp_max","crea_max","inf_rbc_max",
"hb_min","wbc_count","plt_count_min",
"bili_max","p_asat","p_alat",
"resp_rate_max","sat","syst_bp_min"
)),
label_name = case_when(name == "bili_max" ~ "Bilirubin\n(\U003BCmol/L)",
name == "crea_max" ~ "Creatinine\n(\U003BCmol/L)",
name == "crp_max" ~ "CRP\n(mg/L)",
name == "hb_min" ~ "Hemoglobin\n(g/L)",
name == "inf_rbc_max" ~ "Parasitemia\n(%)",
name == "plt_count_min" ~ "Platelet\n(counts)",
name == "sat" ~ "Saturation\n(%)",
name == "p_asat" ~ "AST\n(U/L)",
name == "p_alat" ~ "ALT\n(U/L)",
name == "resp_rate_max" ~ "Respirations rate\n(bpm)",
name == "wbc_count" ~ "White blood cells\n(counts)",
name == "syst_bp_min" ~ "Systolic blood\npressure (mmHg)"
),
label_unit = case_when(name == "bili_max" ~ "unit",
name == "crea_max" ~ "unit",
name == "crp_max" ~ "mg/L",
name == "hb_min" ~ "g/dL",
name == "inf_rbc_max" ~ "%",
name == "plt_count_min" ~ "counts",
name == "sat" ~ "%",
name == "p_asat" ~ "unit",
name == "p_alat" ~ "unit",
name == "resp_rate_max" ~ "bpm",
name == "wbc_count" ~ "counts",
name == "syst_bp_min" ~ "mmHg"
)) %>%
arrange(name) %>%
group_by(name) %>%
nest() %>%
mutate(single_plot = purrr::map(data, single_facet_fun))
clin.data.severity.groups.new <- wrap_plots(p_list$single_plot, ncol=3)
clin.data.severity.groups.new.data <- p_list %>%
unnest(data) %>%
compare_means(
value ~ severity_lab, data = ., group.by = "name",
method = "wilcox.test") %>%
transmute(name, group1, group2, p, p.adj, p.signif, method)
## show plot
clin.data.severity.groups.new
data_nested.patclust <- data.long %>%
inner_join(sampleTable_simple, by="sample_id") %>%
inner_join(patient_clust,by="study_id") %>%
mutate(all_vs_1 = ifelse(cluster.orig %in% c("2","3"),"rest",
ifelse(cluster.orig =="1","1",NA)),
all_vs_2 = ifelse(cluster.orig %in% c("1","3"),"rest",
ifelse(cluster.orig =="2","2",NA)),
all_vs_3 = ifelse(cluster.orig %in% c("2","1"),"rest",
ifelse(cluster.orig =="3","3",NA))) %>%
group_by(UniProt,Assay) %>%
nest()
g_vs_conv <- data_nested.patclust %>%
mutate(lme.res = purrr::map(data, ~ lmer(NPX ~ Time * severity_lab + (1|study_id), REML = F,
data = .x %>% dplyr::filter(Time!="D10"))),
lme.tidy = purrr::map(lme.res, ~ broom.mixed::tidy(.)),
posthoc.time_exposure = purrr::map(lme.res, ~ summary(contrast(emmeans(., ~ Time * severity_lab), method = "pairwise")) %>% tibble())
)
g_vs_conv_padj <- g_vs_conv %>%
unnest(cols="posthoc.time_exposure") %>%
filter(contrast %in%c("Acute severe - M12 severe",
"Acute moderate - M12 moderate",
"Acute mild - M12 mild")) %>%
transmute(Assay, UniProt, contrast, estimate,SE,df,t.ratio,p.value) %>%
ungroup() %>%
group_by(contrast) %>%
mutate(p.adj = p.adjust(p.value, method="fdr"),
FDR = ifelse(p.adj <= 0.01, TRUE,FALSE)) %>%
ungroup() %>%
arrange(p.adj)
(severity_groups_conv_volc <- g_vs_conv_padj %>%
group_by(contrast) %>%
mutate(severity_lab = case_when(grepl("severe",contrast) ~ "severe",
grepl("moderate",contrast) ~ "moderate",
grepl("mild",contrast) ~ "mild",
.default = NA),
severity_lab = factor(severity_lab, levels=c("severe","moderate","mild")),
sig_col = case_when(FDR==T ~ severity_lab,
.default = NA)) %>%
#slice_max(order_by = estimate, n=1) %>%
ggplot(aes(x=severity_lab, y= estimate, color=sig_col)) +
geom_jitter(width=.1,alpha=.2, show.legend = F,size=.5, shape=16) +
ggrepel::geom_text_repel(data= . %>%
group_by(severity_lab) %>% slice_max(n=8,order_by = estimate), aes(label=Assay), show.legend = F,force = .5,
segment.size=0.2,
segment.alpha=.1,
size=1.5,max.overlaps = 15, color="gray35") +
ggrepel::geom_text_repel(data= . %>%
group_by(severity_lab) %>% slice_min(n=8,order_by = estimate), aes(label=Assay), show.legend = F,force = .5,
segment.size=0.2,
segment.alpha=.1,
size=1.5, max.overlaps = 15, color="gray35") +
geom_hline(yintercept=0,
linetype = 3) +
scale_color_manual(values = patient_kclust3_lab,na.value = "grey") +
labs(x=NULL,
title="Each group vs convalecence",
subtitle = "mixed effect model approach - acute_severity vs m12_severity",
caption="FDR < 0.01"))
require(UpSetR) # https://cran.r-project.org/web/packages/UpSetR/vignettes/basic.usage.html
g_vs_conv_padj_tmp.list <- g_vs_conv_padj %>%
group_by(contrast) %>%
mutate(severity_lab = case_when(grepl("severe",contrast) ~ "severe",
grepl("moderate",contrast) ~ "moderate",
grepl("mild",contrast) ~ "mild",
.default = NA),
severity_lab = factor(severity_lab, levels=c("severe","moderate","mild")),
sig_col = case_when(FDR==T ~ severity_lab,
.default = NA)) %>%
filter(estimate>1) %>%
group_by(severity_lab) %>%
summarise(list = list(Assay)) %>%
mutate(list = setNames(list, severity_lab)) %>%
pull(list)
severe_log1 <- intersect(setdiff(g_vs_conv_padj_tmp.list$severe, g_vs_conv_padj_tmp.list$mild),
setdiff(g_vs_conv_padj_tmp.list$severe, g_vs_conv_padj_tmp.list$moderate))
pdf(paste0(result.tmp.dir,"severity_daps_upset.pdf"),width = 7, height = 3)
(UpSetR::upset(fromList(g_vs_conv_padj_tmp.list),
order.by = "freq",point.size = 2,
text.scale = 1.2,
#mb.ratio = c(0.6, 0.4),
sets.bar.color = c("severe" = "#ca0020","moderate" = "#f4a582", "mild" = "#92c5de"),
keep.order = TRUE,
mainbar.y.label = "Number of Proteins",
sets.x.label = "Proteins per group"))
dev.off()
## quartz_off_screen
## 2
library(gtsummary)
(severityTable <- subjectTable %>%
mutate(wbc_count = as.numeric(wbc_count),
sat = as.numeric(sat)) %>%
left_join(patient_clust, by="study_id") %>%
tbl_summary(include = c(inf_rbc_max,
crp_max,
bili_max,
crea_max,
sat,
resp_rate_max,
syst_bp_min,
plt_count_min,
hb_min),
by = severity_lab, # split table by group
statistic = list(
all_continuous() ~ "{median} ({min}-{max})",
all_categorical() ~ "{n} / {N} ({p}%)"
),
digits = all_continuous() ~ 2,
missing_text = "(Missing)") %>%
add_n() %>% # add column with total number of non-missing observations
add_p() %>% # test for a difference between groups
modify_header(label = "**Variable**") %>% # update the column header
bold_labels())
Variable | N | severe, N = 171 | moderate, N = 361 | mild, N = 191 | p-value2 |
---|---|---|---|---|---|
inf_rbc_max | 72 | 3.00 (0.01-8.00) | 0.80 (0.10-5.00) | 0.20 (0.01-1.80) | 0.003 |
crp_max | 72 | 208.00 (96.00-326.00) | 156.50 (28.00-381.00) | 101.00 (14.00-246.00) | 0.002 |
bili_max | 64 | 25.00 (12.00-135.00) | 21.00 (10.00-100.00) | 19.00 (5.00-173.00) | 0.3 |
    (Missing) | 0 | 4 | 4 | ||
crea_max | 72 | 108.00 (68.00-828.00) | 95.00 (47.00-193.00) | 86.00 (49.00-133.00) | 0.3 |
sat | 72 | 0.8 | |||
    92 | 0 / 17 (0%) | 0 / 36 (0%) | 2 / 19 (11%) | ||
    93 | 0 / 17 (0%) | 2 / 36 (5.6%) | 0 / 19 (0%) | ||
    94 | 0 / 17 (0%) | 1 / 36 (2.8%) | 0 / 19 (0%) | ||
    95 | 2 / 17 (12%) | 4 / 36 (11%) | 2 / 19 (11%) | ||
    96 | 1 / 17 (5.9%) | 4 / 36 (11%) | 0 / 19 (0%) | ||
    97 | 2 / 17 (12%) | 5 / 36 (14%) | 4 / 19 (21%) | ||
    98 | 6 / 17 (35%) | 10 / 36 (28%) | 6 / 19 (32%) | ||
    99 | 4 / 17 (24%) | 4 / 36 (11%) | 4 / 19 (21%) | ||
    100 | 2 / 17 (12%) | 6 / 36 (17%) | 1 / 19 (5.3%) | ||
resp_rate_max | 72 | 20.00 (14.00-48.00) | 20.00 (12.00-32.00) | 18.00 (13.00-42.00) | 0.3 |
syst_bp_min | 72 | 100.00 (70.00-120.00) | 106.50 (70.00-139.00) | 108.00 (76.00-125.00) | 0.13 |
plt_count_min | 71 | 32.00 (14.00-112.00) | 63.00 (28.00-134.00) | 99.00 (51.00-183.00) | <0.001 |
    (Missing) | 0 | 0 | 1 | ||
hb_min | 72 | 108.00 (64.00-157.00) | 122.00 (66.00-165.00) | 113.00 (66.00-159.00) | 0.2 |
1 Median (Minimum-Maximum); n / N (%) | |||||
2 Kruskal-Wallis rank sum test; Fisher’s exact test |
subjectTable %>%
mutate(wbc_count = as.numeric(wbc_count),
sat = as.numeric(sat)) %>%
left_join(patient_clust, by="study_id") %>%
transmute(study_id,
severity_lab,
diff_acuteSample_treatment,diff_acuteSample_treatment.abs,
diff_acuteSample_spt_current,diff_acuteSample_spt_current.abs) %>%
pivot_longer(cols = -c(study_id,severity_lab)) %>%
compare_means(
value ~ severity_lab, data = ., group.by = "name",
method = "wilcox.test")
## # A tibble: 12 × 9
## name .y. group1 group2 p p.adj p.format p.signif method
## <chr> <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <chr>
## 1 diff_acuteSample_tr… value severe moder… 0.329 1 0.33 ns Wilco…
## 2 diff_acuteSample_tr… value severe mild 0.289 1 0.29 ns Wilco…
## 3 diff_acuteSample_tr… value moder… mild 0.657 1 0.66 ns Wilco…
## 4 diff_acuteSample_tr… value severe moder… 0.220 1 0.22 ns Wilco…
## 5 diff_acuteSample_tr… value severe mild 0.289 1 0.29 ns Wilco…
## 6 diff_acuteSample_tr… value moder… mild 0.801 1 0.80 ns Wilco…
## 7 diff_acuteSample_sp… value severe moder… 0.904 1 0.90 ns Wilco…
## 8 diff_acuteSample_sp… value severe mild 0.335 1 0.34 ns Wilco…
## 9 diff_acuteSample_sp… value moder… mild 0.219 1 0.22 ns Wilco…
## 10 diff_acuteSample_sp… value severe moder… 0.904 1 0.90 ns Wilco…
## 11 diff_acuteSample_sp… value severe mild 0.335 1 0.34 ns Wilco…
## 12 diff_acuteSample_sp… value moder… mild 0.219 1 0.22 ns Wilco…
#clin.data.severity.groups.new.data %>%
# write_tsv(paste0(result.dir,"Supplementary_TableS4_ClinicalChemistry_severity_groups.tsv"))
clin.data.severity.groups.new.data %>% head()
## # A tibble: 6 × 7
## name group1 group2 p p.adj p.signif method
## <fct> <chr> <chr> <dbl> <dbl> <chr> <chr>
## 1 crp_max severe moderate 0.0191 0.53 * Wilcoxon
## 2 crp_max severe mild 0.000781 0.027 *** Wilcoxon
## 3 crp_max moderate mild 0.0860 1 ns Wilcoxon
## 4 crea_max severe moderate 0.281 1 ns Wilcoxon
## 5 crea_max severe mild 0.124 1 ns Wilcoxon
## 6 crea_max moderate mild 0.381 1 ns Wilcoxon
prot.input <- prot.data.4.corr %>% column_to_rownames("study_id")
clin.input <- clin.data.4.corr %>% dplyr::select(study_id,c(plt_count_min,inf_rbc_max,crp_max,hb_min,bili_max,crea_max,p_alat,p_asat)) %>% #,contains("SOFA")) %>%
column_to_rownames("study_id")
cor.res <- prot.input[rownames(clin.input),] %>%
correlation::correlation(data2 = clin.input,
method = "spearman",
redundant = F,
p_adjust = "fdr") %>%
tibble()
df <- cor.res %>%
filter(n_Obs >= 37,
p<=0.05,
abs(rho)>=0.45
) %>%
transmute(from=Parameter2,
to=Parameter1,
value=rho) %>%
group_by(to) %>%
mutate(n_prot =n()) %>%
ungroup() %>%
group_by(from) %>%
mutate(n_clin = n()) %>%
ungroup() %>%
arrange(desc(n_prot),n_clin) %>%
mutate(from = case_when(from=="crp_max"~"CRP",
from=="p_alat"~"ALT",
from=="p_asat"~"AST",
from=="plt_count_min"~"Platelets",
from=="inf_rbc_max" ~"Parasitemia",
from=="bili_max"~"Bilirubin",
from=="hb_min" ~"Hemoglobin",
from=="crea_max"~"Creatinine",
.default=from))
df
## # A tibble: 171 × 5
## from to value n_prot n_clin
## <chr> <chr> <dbl> <int> <int>
## 1 CRP CTSL 0.471 3 22
## 2 CRP ICAM1 0.489 3 22
## 3 CRP KYNU 0.467 3 22
## 4 CRP PTS 0.462 3 22
## 5 CRP PVR 0.450 3 22
## 6 ALT CTSL 0.463 3 36
## 7 ALT ICAM1 0.557 3 36
## 8 ALT KYNU 0.467 3 36
## 9 ALT PTS 0.525 3 36
## 10 ALT PVR 0.528 3 36
## # ℹ 161 more rows
#string <- unique(df$from)
#string <- setdiff(unique(df$from),names(clin_marker_cols))
#col.grid_clin <- setNames(sample(brewer.pal(length(string),name="Set1")),string)
string_proteins <- unique(df$to)
col.grid.prot <- setNames(rep("grey80",length(string_proteins)), string_proteins)
col.grid <- c(clin_marker_cols,
#col.grid_clin,
col.grid.prot)
## highlight
# three-column data frame in which the first two columns correspond to row names and column names in the matrix, and the third column corresponds to the graphic parameters
border_df = data.frame(c("Parasitemia"), c("CALCA"), c(1))
pdf(paste0(result.tmp.dir,"chordDiagram.pdf")) #width 6.9
circos.par(gap.after = c(rep(1, length(unique(df[[1]]))-1), 15,
rep(1, length(unique(df[[2]]))-1), 15))
chordDiagram(df,
# big.gap = 25,
grid.col = col.grid,
#annotationTrack = "grid",
big.gap = 10,
small.gap = 1,
link.border = border_df,
annotationTrack = NULL,
preAllocateTracks = list(track.height = .1))#max(strwidth(unlist(dimnames(df))))))
circos.track(track.index = 1, panel.fun = function(x, y) {
circos.text(CELL_META$xcenter,
CELL_META$ylim[1],
CELL_META$sector.index,
facing = "clockwise",
cex = 0.6,
niceFacing = TRUE,
adj = c(0, 0.9))
},
bg.border = NA)
#
dev.off()
## quartz_off_screen
## 2
circos.clear()
Identification of severity-associated plasma proteomic profiles
## WGCNA
## https://bioinformaticsworkbook.org/tutorials/wgcna.html#gsc.tab=0
#install.packages("BiocManager")
#BiocManager::install("WGCNA")
## data wrangling
selected.assays.wcna <- dap.res %>% filter(p.adj <= 0.01) %>% pull(Assay)
## requires: rows = treatments and columns = gene probes
input_mat <- data.wide %>%
inner_join(sampleTable_simple %>% dplyr::select(DAid,study_id,Time, sample_id),by="sample_id") %>%
inner_join(subjectTable %>% dplyr::select(study_id),by="study_id") %>%
filter(Time=="Acute") %>%
column_to_rownames("sample_id") %>%
## restricting to proteins, significant abundant over convalescence (m12 samples)
dplyr::select(selected.assays.wcna) %>%
as.matrix() %>%
scale()
input_mat[1:5,1:10]
## TNFRSF8 IL10 CXCL9 CD74 TNFRSF1B
## 2011PT01|Acute -0.7370011 -1.1852994 -1.4157459 0.08771475 -1.9048313
## 2011PT04|Acute -0.6779312 0.3667134 -0.0943544 -0.19212623 0.2678492
## 2011PT05|Acute 0.1403045 1.0528265 0.5657621 1.29297919 0.3504350
## 2011PT06|Acute 0.2332304 -1.7527932 -1.0945056 -0.41377242 -0.4221094
## 2011PT07|Acute 0.9822293 1.4187290 0.7498430 1.43859264 1.2466066
## VCAM1 PLA2G2A IL18BP CSF1 B4GALT1
## 2011PT01|Acute -1.1412591 -1.01940457 -1.4608293 -1.6021606 -0.54555371
## 2011PT04|Acute -0.3286294 0.09020523 -0.5969576 -0.3028854 -0.45244260
## 2011PT05|Acute 0.4595043 0.74644622 0.5612046 0.1410837 1.00463269
## 2011PT06|Acute -0.2196398 0.29812935 -0.9891447 -0.5298451 0.04218177
## 2011PT07|Acute 0.9852817 1.00324357 1.1329858 1.2435433 1.42461678
dim(input_mat)
## [1] 72 703
Set up
allowWGCNAThreads() # allow multi-threading (optional)
## Allowing multi-threading with up to 10 threads.
#> Allowing multi-threading with up to 4 threads.
# Choose a set of soft-thresholding powers
powers = c(c(1:10), seq(from = 12, to = 20, by = 2))
# Call the network topology analysis function
sft = pickSoftThreshold(input_mat, # <= Input data
#blockSize = 30,
powerVector = powers,
verbose = 5
)
## pickSoftThreshold: will use block size 703.
## pickSoftThreshold: calculating connectivity for given powers...
## ..working on genes 1 through 703 of 703
## Power SFT.R.sq slope truncated.R.sq mean.k. median.k. max.k.
## 1 1 0.0295 -0.368 0.716 162.0000 1.62e+02 289.00
## 2 2 0.4720 -1.210 0.864 58.7000 5.39e+01 158.00
## 3 3 0.6750 -1.480 0.913 26.3000 2.17e+01 97.20
## 4 4 0.7360 -1.680 0.962 13.4000 9.78e+00 63.90
## 5 5 0.7860 -1.700 0.969 7.4900 4.66e+00 44.00
## 6 6 0.8160 -1.740 0.958 4.4600 2.44e+00 31.30
## 7 7 0.8150 -1.770 0.918 2.7900 1.34e+00 22.90
## 8 8 0.8560 -1.770 0.973 1.8200 7.88e-01 17.10
## 9 9 0.8840 -1.780 0.968 1.2200 4.80e-01 13.00
## 10 10 0.8860 -1.790 0.940 0.8480 2.91e-01 10.10
## 11 12 0.2960 -2.610 0.144 0.4370 1.13e-01 6.27
## 12 14 0.3000 -2.550 0.150 0.2430 4.86e-02 4.08
## 13 16 0.3440 -3.540 0.322 0.1440 2.05e-02 2.74
## 14 18 0.3370 -3.380 0.298 0.0904 8.94e-03 1.91
## 15 20 0.3340 -3.270 0.307 0.0595 4.31e-03 1.43
#sft$powerEstimate
#### Scale independence & mean connectivity
sft_tibble <- as_tibble(sft$fitIndices)
plot.si <- sft_tibble %>%
ggplot(aes(x=Power,
y=-sign(slope)*SFT.R.sq)) +
geom_point() +
geom_label(aes(label=Power)) +
geom_hline(yintercept = 0.9, color="darkred") +
theme_minimal() +
labs(title = "Scale independence",
y="Scale Free Topology Model fit\n signed R^2",
x= "Soft Threshold (power")
plot.meank <- sft_tibble %>%
ggplot(aes(x=Power,
y=mean.k.)) +
geom_label(aes(label=Power)) +
theme_minimal() +
labs(title="Mean connectivity",
x="Soft Threshold (power)",
y="Mean Connectivity")
plot.si + plot.meank
##Build co-expression network
picked_power = 6#sft$powerEstimate#6
temp_cor <- cor
cor <- WGCNA::cor # Force it to use WGCNA cor function (fix a namespace conflict issue)
netwk <- blockwiseModules(input_mat, # <= input here
# == Adjacency Function ==
power = picked_power, # <= power here
networkType = "signed",#"signed hybrid",#"signed",
# == Tree and Block Options ==
deepSplit = 4, #sensitive module detection should be to module splitting, 0 least and 4 most sensitive
pamRespectsDendro = F,
# detectCutHeight = 0.75,
minModuleSize = 30,
maxBlockSize =ncol(input_mat),#4000,
# == Module Adjustments ==
reassignThreshold = 0,
mergeCutHeight = 0.25,
# == TOM == Archive the run results in TOM file (saves time)
saveTOMs = T,
saveTOMFileBase = "ER",
# == Output Options
numericLabels = T,
verbose = 3)
## Calculating module eigengenes block-wise from all genes
## Flagging genes and samples with too many missing values...
## ..step 1
## ..Working on block 1 .
## TOM calculation: adjacency..
## ..will use 10 parallel threads.
## Fraction of slow calculations: 0.000000
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.
## ..saving TOM for block 1 into file ER-block.1.RData
## ....clustering..
## ....detecting modules..
## ....calculating module eigengenes..
## ....checking kME in modules..
## ..removing 82 genes from module 1 because their KME is too low.
## ..removing 5 genes from module 2 because their KME is too low.
## ..removing 13 genes from module 3 because their KME is too low.
## ..removing 2 genes from module 6 because their KME is too low.
## ..merging modules that are too close..
## mergeCloseModules: Merging modules whose distance is less than 0.25
## Calculating new MEs...
cor <- temp_cor # Return cor function to original namespace
##### Cluster Dendrogram
# Convert labels to colors for plotting
mergedColors = labels2colors(netwk$colors)
# Plot the dendrogram and the module colors underneath
(cluster_dendro <- plotDendroAndColors(
netwk$dendrograms[[1]],
mergedColors[netwk$blockGenes[[1]]],
"Module colors",
dendroLabels = FALSE,
hang = 0.03,
addGuide = TRUE,
guideHang = 0.05 ))
## $mar
## [1] 1 5 0 1
module_df <- data.frame(
assay_id = names(netwk$colors),
Assay = gsub("\\_.*","",names(netwk$colors)),
colors = labels2colors(netwk$colors)
)
module_df[1:5,]
## assay_id Assay colors
## 1 TNFRSF8 TNFRSF8 turquoise
## 2 IL10 IL10 brown
## 3 CXCL9 CXCL9 brown
## 4 CD74 CD74 brown
## 5 TNFRSF1B TNFRSF1B turquoise
tmp <- unique(module_df$colors)
module.cols <- setNames(tmp, tmp)
## how many proteis in each module
(module_overview <- module_df %>%
group_by(colors) %>%
count() %>%
ggplot(aes(x = fct_reorder(colors,-n), y = n, fill = colors, label = n)) +
geom_bar(stat = "summary", position = "dodge", show.legend = F) +
geom_text(stat = "sum", vjust = -0.5,show.legend = F, size=2) +
scale_y_continuous(#limits=c(0,300),
expand = c(0, 20)) +
scale_x_discrete(expand = c(0,-1)) +
scale_fill_manual(values=module.cols) +
theme_minimal() +
theme(axis.text.x = element_blank(),
axis.text = element_text(size=6),
axis.title = element_text(size=6),
axis.ticks.x = element_blank()
) +
labs(title = "WGCNA analysis - protein network modules",
subtitle = paste0("based on ",dim(input_mat)[2]," proteins (differential abundant proteins)\n",
"profiled in ",dim(input_mat)[1]," acute malaria samples"),
x="WGCNA protein modules",
y="n Proteins")
)
#### generate and export networks for all modules
assays_of_interest = module_df #%>%
#subset(colors %in% c("turquoise"))
npx_of_interest = input_mat[,assays_of_interest$assay_id]
npx_of_interest[1:5,1:5]
## TNFRSF8 IL10 CXCL9 CD74 TNFRSF1B
## 2011PT01|Acute -0.7370011 -1.1852994 -1.4157459 0.08771475 -1.9048313
## 2011PT04|Acute -0.6779312 0.3667134 -0.0943544 -0.19212623 0.2678492
## 2011PT05|Acute 0.1403045 1.0528265 0.5657621 1.29297919 0.3504350
## 2011PT06|Acute 0.2332304 -1.7527932 -1.0945056 -0.41377242 -0.4221094
## 2011PT07|Acute 0.9822293 1.4187290 0.7498430 1.43859264 1.2466066
## columns: Assays
## rows: sample_id
TOM = TOMsimilarityFromExpr(npx_of_interest,
power = picked_power)
## TOM calculation: adjacency..
## ..will use 10 parallel threads.
## Fraction of slow calculations: 0.000000
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.
# Add gene names to row and columns
row.names(TOM) = colnames(npx_of_interest)
colnames(TOM) = colnames(npx_of_interest)
edge_list = data.frame(TOM) %>%
rownames_to_column("Assay1") %>%
pivot_longer(cols=-Assay1,names_to = "Assay2",values_to = "adjacency") %>%
distinct() %>%
filter(Assay1!=Assay2) %>%
right_join(module_df %>% transmute(module1 = colors,
Assay1 = Assay)) %>%
right_join(module_df %>% transmute(module2 = colors,
Assay2 = Assay)) %>%
na.omit()
##Heatmap - protein adjacency
mat <- cor(npx_of_interest, method = "pearson")
row.anno.df <- data.frame(assay_id = rownames(mat)) %>% left_join(module_df) %>% dplyr::rename(Module = colors)
(assay_adj_hm <- mat %>%
Heatmap(name="protein-protein correlation r",
right_annotation = HeatmapAnnotation(df = row.anno.df %>% transmute(Module),
col = list(Module = module.cols),
which = "row",
simple_anno_size = unit(1, "mm"),
show_annotation_name = F,
annotation_name_rot = 0,
annotation_name_gp = gpar(fontsize=6),
annotation_name_side = "top",
show_legend = F,
annotation_legend_param = list(title_gp = gpar(fontsize = 6),
labels_gp = gpar(fontsize = 6))),
row_split = row.anno.df$Module,
column_split = row.anno.df$Module,
row_gap = unit(0, "mm"),
column_gap = unit(0, "mm"),
row_dend_width = unit(3, "mm"),
row_dend_gp = gpar(lwd=.1),
column_dend_gp = gpar(lwd=.1),
column_dend_height = unit(3, "mm"),
border = TRUE,
border_gp = gpar(lwd=.1),
column_title = NULL,
row_title = NULL,
show_row_names = F,
show_column_names = F,
heatmap_legend_param = list(labels_gp = gpar(fontsize = 6),
title_gp = gpar(fontsize = 6),
direction = "horizontal",
legend_width = unit(2, "cm"),
grid_height = unit(.2, "cm"))
))
# Get Module Eigengenes per cluster
MEs0 <- moduleEigengenes(input_mat, mergedColors)$eigengenes
# Reorder modules so similar modules are next to each other
MEs0 <- orderMEs(MEs0)
module_order = names(MEs0) %>% gsub("ME","", .)
# Add treatment names
#MEs0$DAid <- row.names(MEs0)
# tidy data
mME <- MEs0 %>%
rownames_to_column("sample_id") %>%
pivot_longer(names_to = "module", values_to = "module_eigengenes" ,cols = -sample_id) %>%
separate(sample_id, "\\|", into = c("study_id","Time"),remove = F) %>%
mutate(module = gsub("ME", "", module),
module = factor(module, levels = module_order)) %>%
inner_join(subjectTable,
by="study_id")
## Module-trait relationship
corr_module_eigengene_meta_res <- mME %>%
dplyr::select(module, module_eigengenes,contains("SOFA")) %>%
group_by(module) %>%
correlation(p_adjust = "fdr")
df <- tibble(corr_module_eigengene_meta_res) %>%
filter(Parameter1=="module_eigengenes") %>%
mutate(r4fill = case_when(p>0.05 ~ 0,
.default=r))
(module_trait_plot <- ggplot(data = df,
aes(x=Parameter2, y=Group, fill=r4fill,lable=r)) +
geom_tile() +
geom_text(aes(label = paste0(r %>% round(2)),
color = ifelse(r4fill==0, "grey20", "black")),
size=1) +
scale_colour_identity() +
theme_bw() +
scale_fill_gradient2(
low = "blue",
high = "red",
mid = "white",
midpoint = 0,
limit = c(-1,1)) +
labs(title = "Module-trait Relationships",
y="WGCNA modules",
x = NULL,
fill="rho") +
theme(#axis.text.y = element_text(color= rev(c("yellow","turquoise","red","grey","green","brown","blue"))),
axis.text.y = element_text(color= rev(c("yellow","turquoise","red","grey","green","brown","blue"))),
axis.text.x = element_text(angle=45, hjust=1)))
#### Module membership
### Get Module Eigengenes per cluster
MEs <- moduleEigengenes(input_mat, mergedColors)$eigengenes
## calculate Module membership
geneModuleMembership <- as.data.frame(cor(input_mat, MEs, use = "p"))
geneModuleMembership.tidy <- geneModuleMembership %>%
rownames_to_column("Assay") %>%
pivot_longer(cols = -Assay) %>%
transmute(Assay,
Module = str_remove(name, "ME"),
gMM = value)
## calculate pvalue for geneModuleMembership
MMPvalue.tidy <- as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership), nrow(input_mat))) %>%
rownames_to_column("Assay") %>%
pivot_longer(cols = -Assay) %>%
transmute(Assay,
Module = str_remove(name, "ME"),
pvalue = value)
gMM.tidy <- geneModuleMembership.tidy %>% right_join(MMPvalue.tidy,by=c("Assay","Module"))
module_specific_MM <- module_df %>%
transmute(Assay,
Module = colors) %>%
inner_join(gMM.tidy)
(turquoise_module_restrictions_density <- module_specific_MM %>%
mutate("-log10(pvalue)" = -log10(pvalue)) %>%
pivot_longer(cols = c(gMM,"-log10(pvalue)")) %>%
mutate(name_label = case_when(name=="gMM"~"threshold > 0.6",
name=="-log10(pvalue)"~"threshold < 0.05",
.default = NA),
name_label = factor(name_label),
cutoff = case_when(name=="gMM"~ 0.6,#0.75,
name=="-log10(pvalue)"~ -log10(0.05),
.default = NA),
cutoff_pos_y = case_when(name=="gMM"~ 1,
name=="-log10(pvalue)"~ 0.04,
.default = NA)
) %>%
ggplot(aes(x=value)) +
geom_density(show.legend = F,linewidth=.2,fill="turquoise") +
theme_minimal() +
facet_wrap(~name, ncol = 1,labeller = labeller(name=c("gMM" = "Module membership", "-log10(pvalue)" = "-log10(pvalue)")),scales = "free") +
geom_vline(aes(xintercept=cutoff),linetype="dashed") +
geom_text(aes(x=cutoff,
y=cutoff_pos_y,
label=name_label),
size=1,
check_overlap = TRUE) +
labs(y="density",
x=""))
restricted_module_turquoise <- module_specific_MM %>%
filter(Module=="turquoise",
gMM > 0.6,#0.75,
pvalue < 0.05
) %>%
arrange(-pvalue) %>%
pull(Assay)
length(restricted_module_turquoise)
## [1] 158
data.frame(Assay = restricted_module_turquoise) %>%
left_join(dap.res %>%
transmute(Assay, UniProt)) %>%
transmute(UniProt) %>%
write_tsv(paste0(result.tmp.dir,"restricted_module_turquoise.tsv"))
#head()
##Online reactome
#reactome_result<- read_delim("../Manuscript/20250226_restricted_module_turquoise_ReactomeORA_Result.txt") %>% janitor::clean_names()
reactome_result<- read_delim("../Manuscript/20250325_restricted_module_turquoise_ReactomeORA_Result.txt") %>% janitor::clean_names()
(reactome_ora <- reactome_result %>%
arrange(entities_fdr) %>%
head(n=10) %>%
mutate(facet_lab = "turquoise module") %>%
ggplot(aes(x=fct_reorder(pathway_name,-log10(entities_fdr)),
y=-log10(entities_fdr))) +
geom_bar(stat = "identity", width = 0.1) +
geom_point(aes(color=-log10(entities_fdr)),
size=2) +
geom_text(aes(label=number_entities_found),
size=2, nudge_y = .1, color="black")+
scale_y_continuous(trans="log10") +
scale_x_discrete(labels = function(x) str_wrap(x, width = 45)) +
scale_color_viridis() +
theme_minimal() +
theme(text = element_text(size=6 ),
axis.text.y = element_text(size = 6),
axis.ticks.x = element_blank()) +
coord_flip() +
facet_grid(~facet_lab) +
guides(size = guide_legend(reverse=TRUE),
) +
labs(title = "Reactome database v86",
color="-log10\n(FDR)",
y="-log10(FDR)",
x=NULL)
)
wrapper <- function(x, ...)
{
paste(strwrap(x, ...), collapse = "\n")
}
a = "Immunoregulatory interactions between a Lymphoid and a non-Lymphoid cell"
df <- reactome_result %>%
arrange(entities_fdr) %>%
head(n=10) %>%
filter(pathway_name==a) %>%
transmute(pathway_name, submitted_entities_found) %>%
separate_rows(submitted_entities_found, sep=";\\s*") %>%
left_join(
data.long %>% inner_join(sampleTable_simple, by="sample_id") %>% transmute(Assay,NPX,UniProt,sample_id,study_id,Time) %>% filter(Time=="Acute"),
by=c("submitted_entities_found"="UniProt")
) %>%
inner_join(patient_clust,by="study_id")
(reactome_ora_IIBLNL <- df %>% group_by(Assay, severity_lab) %>%
summarise(NPXmean = mean(NPX),
NPXmedian = median(NPX),
NPXsd = sd(NPX),
NPXn = n(),
NPXse = NPXsd / sqrt(NPXn)
) %>%
mutate(NPXci95 = NPXse * qt(.975, NPXn - 1)) %>%
ggplot(aes(x=Assay, y=NPXmean, group=severity_lab, color=severity_lab)) +
geom_point(size=.25) +
geom_polygon(fill=NA, show.legend = F, lwd=0.2) +
geom_errorbar(aes(x = Assay,
ymin=NPXmean-NPXci95,
ymax=NPXmean+NPXci95, ),
linewidth=.5,
width=.2,
alpha=.5) +
# Make it circular!
coord_polar(clip = "off") +
theme_minimal() +
labs(x="",
y="mean (NPX) +- 95% CI",
title = wrapper(a, width = 40),
color="mean (NPX) +- 95% CI") +
scale_color_manual(values=patient_kclust3_lab) +
# Annotate the bars and the lollipops so the reader understands the scaling
annotate(x = 0, y = 0, label = "0", fontface =2, geom = "text", color = "gray12", size = 1.5) +
annotate(x = 0, y = 1, label = "1", fontface =2, geom = "text", color = "gray12", size = 1.5) +
annotate(x = 0, y = 2, label = "2", fontface =2, geom = "text", color = "gray12", size = 1.5) +
annotate(x = 0, y = 3, label = "3", fontface =2, geom = "text", color = "gray12", size = 1.5) +
theme(
axis.title = element_blank(),
axis.ticks = element_blank(),
axis.text.y = element_blank(),
axis.text.x = element_text(color = "gray12", size = 4),
legend.position = "right",
legend.text = element_text(size=6),
legend.title = element_text(size=6),
title = element_text(size=5, face='bold'),
plot.title = element_text(hjust = 0.5))
)
a = "Neutrophil degranulation"
df <- reactome_result %>%
arrange(entities_fdr) %>%
head(n=10) %>%
filter(pathway_name==a) %>%
transmute(pathway_name, submitted_entities_found) %>%
separate_rows(submitted_entities_found, sep=";\\s*") %>%
left_join(
data.long %>% inner_join(sampleTable_simple, by="sample_id") %>% transmute(Assay,NPX,UniProt,sample_id,study_id,Time) %>% filter(Time=="Acute"),
by=c("submitted_entities_found"="UniProt")
) %>%
inner_join(patient_clust,by="study_id")
(reactome_ora_ND <- df %>% group_by(Assay, severity_lab) %>%
summarise(NPXmean = mean(NPX),
NPXmedian = median(NPX),
NPXsd = sd(NPX),
NPXn = n(),
NPXse = NPXsd / sqrt(NPXn)
) %>%
mutate(NPXci95 = NPXse * qt(.975, NPXn - 1)) %>%
ggplot(aes(x=Assay, y=NPXmean, group=severity_lab, color=severity_lab)) +
geom_point(size=.25) +
geom_polygon(fill=NA, show.legend = F, lwd=0.2) +
geom_errorbar(aes(x = Assay,
ymin=NPXmean-NPXci95,
ymax=NPXmean+NPXci95, ),
linewidth=.5,
width=.2,
alpha=.5) +
# Make it circular!
coord_polar(clip = "off") +
theme_minimal() +
labs(x="",
title = wrapper(a, width = 40),
color="mean (NPX) +- 95% CI") +
scale_color_manual(values=patient_kclust3_lab) +
# Annotate the bars and the lollipops so the reader understands the scaling
annotate(x = 0, y = 0, label = "0", fontface =2, geom = "text", color = "gray12", size = 1.5) +
annotate(x = 0, y = 1, label = "1", fontface =2, geom = "text", color = "gray12", size = 1.5) +
annotate(x = 0, y = 2, label = "2", fontface =2, geom = "text", color = "gray12", size = 1.5) +
annotate(x = 0, y = 3, label = "3", fontface =2, geom = "text", color = "gray12", size = 1.5) +
theme(
axis.title = element_blank(),
axis.ticks = element_blank(),
axis.text.y = element_blank(),
axis.text.x = element_text(color = "gray12", size = 4),
legend.position = "right",
legend.text = element_text(size=6),
legend.title = element_text(size=6),
title = element_text(size=5, face='bold'),
plot.title = element_text(hjust = 0.5))
)
a = "TNFR2 non-canonical NF-kB pathway"
df <- reactome_result %>%
arrange(entities_fdr) %>%
head(n=10) %>%
filter(pathway_name==a) %>%
transmute(pathway_name, submitted_entities_found) %>%
separate_rows(submitted_entities_found, sep=";\\s*") %>%
left_join(
data.long %>% inner_join(sampleTable_simple, by="sample_id") %>% transmute(Assay,NPX,UniProt,sample_id,study_id,Time) %>% filter(Time=="Acute"),
by=c("submitted_entities_found"="UniProt")
) %>%
inner_join(patient_clust,by="study_id")
(reactome_ora_TNFR2 <- df %>% group_by(Assay, severity_lab) %>%
summarise(NPXmean = mean(NPX),
NPXmedian = median(NPX),
NPXsd = sd(NPX),
NPXn = n(),
NPXse = NPXsd / sqrt(NPXn)
) %>%
mutate(NPXci95 = NPXse * qt(.975, NPXn - 1)) %>%
ggplot(aes(x=Assay, y=NPXmean, group=severity_lab, color=severity_lab)) +
geom_point(size=.25) +
geom_polygon(fill=NA, show.legend = F, lwd=0.2) +
geom_errorbar(aes(x = Assay,
ymin=NPXmean-NPXci95,
ymax=NPXmean+NPXci95),
linewidth=.5,
width=.2,
alpha=.5) +
# Make it circular!
coord_polar(clip = "off") +
theme_minimal() +
labs(x=NULL,
color="mean (NPX) +- 95% CI",
title = wrapper(a, width = 40)) +
scale_color_manual(values=patient_kclust3_lab) +
# Annotate the bars and the lollipops so the reader understands the scaling
annotate(x = 0, y = 0, label = "0", fontface =2, geom = "text", color = "gray12", size = 1.5) +
annotate(x = 0, y = 1, label = "1", fontface =2, geom = "text", color = "gray12", size = 1.5) +
annotate(x = 0, y = 2, label = "2", fontface =2, geom = "text", color = "gray12", size = 1.5) +
annotate(x = 0, y = 3, label = "3", fontface =2, geom = "text", color = "gray12", size = 1.5) +
annotate(x = 0, y = 4, label = "4", fontface =2, geom = "text", color = "gray12", size = 1.5) +
theme(
axis.ticks = element_blank(),
axis.title = element_blank(),
axis.text.y = element_blank(),
axis.text.x = element_text(color = "gray12", size = 4),
legend.position = "right",
legend.text = element_text(size=6),
legend.title = element_text(size=6),
title = element_text(size=5, face='bold'),
plot.title = element_text(hjust = 0.5))
)
Severity-associated profiles of condensed 11- protein signature in malaria and other febrile infections.
library(mixOmics)
data.mo <- df_acute_patclust_incl_conv %>%
filter(Assay %in% restricted_module_turquoise) %>%
pivot_wider(names_from = Assay, values_from = NPX, id_cols=c(severity_lab,sample_id)) %>%
column_to_rownames("sample_id")
X <- data.mo %>% dplyr::select(-c(severity_lab))
Y <- data.mo$severity_lab
#####
data.pca <- mixOmics::pca(X, ncomp=10, center = TRUE, scale = TRUE)
plot(data.pca)
plotIndiv(data.pca,
group = Y,
ind.names = F,
legend = TRUE,
col.per.group = patient_kclust3_lab_conv,
title = 'PCA on all NPX data')
#####
### PLS-Discriminant Analysis based on severity groups
data.plsda <- mixOmics::plsda(X, Y, ncomp = 10)
# takes a couple of minutes to run
perf.data.plsda <- perf(data.plsda,
validation = "Mfold",
folds = 5,
progressBar = F,
auc = TRUE,
nrepeat = 10) #100
################
(plot(perf.data.plsda, col = color.mixo(1:3), sd = TRUE, legend.position = "horizontal"))
## NULL
######
list.keepX <- c(1:10) # grid of possible keepX values that will be tested for each component
tune.splsda <- tune.splsda(X,
Y,
ncomp = 3,
validation = "Mfold",
folds = 5,
progressBar = TRUE,
dist = "centroids.dist",
measure = "BER",
test.keepX = list.keepX,
nrepeat = 10,
cpus = 8)
##
## comp 1
##
|
| | 0%
|
|======= | 10%
|
|============== | 20%
|
|===================== | 30%
|
|============================ | 40%
|
|=================================== | 50%
|
|========================================== | 60%
|
|================================================= | 70%
|
|======================================================== | 80%
|
|=============================================================== | 90%
|
|======================================================================| 100%
## comp 2
##
|
| | 0%
|
|======= | 10%
|
|============== | 20%
|
|===================== | 30%
|
|============================ | 40%
|
|=================================== | 50%
|
|========================================== | 60%
|
|================================================= | 70%
|
|======================================================== | 80%
|
|=============================================================== | 90%
|
|======================================================================| 100%
## comp 3
##
|
| | 0%
|
|======= | 10%
|
|============== | 20%
|
|===================== | 30%
|
|============================ | 40%
|
|=================================== | 50%
|
|========================================== | 60%
|
|================================================= | 70%
|
|======================================================== | 80%
|
|=============================================================== | 90%
|
|======================================================================| 100%
#####
## The classification error rates for each component conditional on the last component are represented below, for all components specified in the tune function.
plot(tune.splsda)
error <- tune.splsda$error.rate # error rate per component for the keepX grid
ncomp <- tune.splsda$choice.ncomp$ncomp
ncomp
## [1] 2
ncomp = 2
#####
select.keepX <- tune.splsda$choice.keepX[1:ncomp] # optimal number of variables to select
select.keepX
## comp1 comp2
## 7 4
#####
splsda.data <- mixOmics::splsda(X, Y, ncomp = ncomp, keepX = select.keepX)
#####
plotIndiv(splsda.data,
comp = c(1,2),
group = Y,
ind.names = F,
ellipse = TRUE,
#col.per.group = patient_kclust3_lab,
legend = T,
title = 'sPLS-DA on data, comp 1 & 2')
plotLoadings(splsda.data, comp = 1,ndisplay=20, title = 'Loadings on comp 1',legend.color = patient_kclust3_lab_conv,
contrib = 'max', method = 'mean')
plotLoadings(splsda.data, comp = 2,ndisplay = 10, title = 'Loadings on comp 2', legend.color = patient_kclust3_lab_conv,
contrib = 'max', method = 'mean')
auc.splsda <- auroc(splsda.data, roc.comp = 2, print = FALSE) # AUROC for the first component
auc.splsda$graph.Comp1
## NULL
auroc(splsda.data, roc.comp = 1, print = FALSE)
splsda.kclust.clusters <- plotIndiv(splsda.data,
comp = c(1,2),
group = Y,
ind.names = F,
ellipse = TRUE,
col.per.group = patient_kclust3_lab_conv,
legend = F,
title = 'sPLS-DA on data, comp 1 & 2')
(splsda.kclust.clusters.ggplot <- splsda.kclust.clusters$df %>%
mutate(group = factor(group, levels=c("severe","moderate","mild","convalescence"))) %>%
ggplot(aes(x=x,y=y,color=group)) +
ggforce::geom_mark_ellipse(aes(color = as.factor(group), fill=group),alpha=.1,show.legend = F, expand = unit(0.5,"mm")) +
geom_point(size=0.5) +
scale_color_manual(values=patient_kclust3_lab_conv) +
scale_fill_manual(values=patient_kclust3_lab_conv) +
theme_minimal() +
labs(title = "sparse PLS-DA",
color= NULL,
x=paste0("X-variate 1: ",round(splsda.data$prop_expl_var$X[[1]]*100,1),"% expl. var"),
y=paste0("X-variate 2: ",round(splsda.data$prop_expl_var$X[[2]]*100,1),"% expl. var")) +
theme(legend.position = "right")
)
plsda_loadings.df <-
data.frame(splsda.data$loadings$X) %>%
rownames_to_column("Assay") %>%
pivot_longer(names_to = "comp",values_to = "values",cols = -Assay) %>%
filter(values != 0,
comp %in% c("comp1","comp2")) %>%
group_by(comp) %>%
mutate(values = scales::rescale(values, to=c(-1,1))) %>%
ungroup()
(plsda_loadings.volcano.alt <- plsda_loadings.df %>%
ggplot(aes(x=fct_reorder(Assay,values,.desc = T), y=values, color=comp,fill=comp)) +
geom_point(size=0.5,alpha=1) +
geom_col(width = 0.01) +
coord_flip() +
theme_minimal() +
scale_color_manual(values=c(comp1 = "#1f78b4",comp2 = "#b2df8a")) +
scale_fill_manual(values=c(comp1 = "#1f78b4",comp2 = "#b2df8a")) +
labs(x="",
fill=NULL,
color=NULL,
y="scaled loading values",
title="sPLSDA loadings") +
theme(legend.title = element_text(size=6),
axis.text.x = element_text(size=6),
legend.position = "right",
legend.justification="right",
legend.box.spacing = unit(0, "pt")))
malaria.severity.siganture <- data.frame(splsda.data$loadings$X) %>%
rownames_to_column("Assay") %>%
pivot_longer(names_to = "comp",values_to = "values",cols = -Assay) %>%
filter(values != 0,
comp %in% c("comp1","comp2")
)
my_comparisons_severe_conv <- list(c("severe", "moderate"), c("moderate", "mild"), c("severe", "mild"),c("mild","convalescence"))
splsda.c1.top9 <- plsda_loadings.df %>%
#filter(comp=="comp1") %>%
slice_max(n=9, order_by = abs(values)) %>%
#arrange(comp) %>%
#slice_min(n=9, order_by = values) %>%
pull(Assay)# %>%
(splsda.c1.top9 <- df_acute_patclust_incl_conv %>%
dplyr::filter(Assay %in% c(splsda.c1.top9)) %>%
mutate(Assay = factor(Assay, levels = c(splsda.c1.top9))) %>%
ggplot(aes(x=severity_lab, y=NPX, color=severity_lab, fill=severity_lab)) +
geom_violin(trim = F,alpha=.9) +
geom_jitter(size=0.25,show.legend = F, width = 0.05, alpha=1, color="grey20") +
geom_boxplot(alpha=.7,width=0.25,outlier.shape = NA,color="black", fatten = 2,lwd=.25,show.legend = F) +
stat_compare_means(method = "wilcox.test",
label.sep = "\n",
hide.ns = T,
label = "p.signif" ,
vjust = .5,
size=2,
lwd = .2,
comparisons =my_comparisons_severe_conv,
show.legend = F) +
facet_wrap(~Assay,ncol = 3,scales = "free_y") +
theme_minimal() +
theme(legend.position="bottom",
axis.text.x = element_blank()) +
labs(x="",
color=NULL,
fill=NULL) +
scale_color_manual(values= patient_kclust3_lab_conv) +
scale_fill_manual(values= patient_kclust3_lab_conv))
plsda.selection <- malaria.severity.siganture %>% pull(Assay)
data.frame(Assay = plsda.selection) %>%
left_join(universe.proteins,by="Assay") %>%
transmute(Assay, UniProt)# %>%
## Assay UniProt
## 1 HMOX1 P09601
## 2 IL2RA P01589
## 3 VCAM1 P19320
## 4 ICAM1 P05362
## 5 IFNGR1 P15260
## 6 B4GALT1 P15291
## 7 LAYN Q6UX15
## 8 TNFRSF10B O14763
## 9 CALCA P01258
## 10 SPINK1 P00995
## 11 TNFRSF1B P20333
#write_tsv(paste0(result.dir,"MIP_Severity_Protein_signature_uniprotid.tsv"))
## Explore 1536 data set - MGH Covid-19 study, Filbin et al. 2021
#### Make data ready
# identify proteins with NPX below LOD in more than 70% of samples
assays2rm <- covid_NPXdata %>%
mutate(belowLOD = LOD>NPX) %>%
group_by(Assay) %>%
count(belowLOD,sort=TRUE) %>%
filter(belowLOD==T,
n > length(unique(covid_NPXdata$SampleID))*0.7) %>%
pull(Assay)
# remove proteins identified above from data
covid_NPXdata_rm <- covid_NPXdata %>%
filter(!Assay%in%assays2rm, Timepoint=="D0") %>%
dplyr::select(SampleID, subject_id, Assay, NPX, Panel)
# identify proteins with different values in different panels
assays2rm <- covid_NPXdata_rm %>%
group_by(subject_id, Assay) %>%
summarise(n = n(), .groups = "drop") %>%
filter(n > 1L) %>%
pull(Assay) %>%
unique()
# one example of the proteins identified above
covid_NPXdata_rm %>% filter(subject_id==1,Assay=="CXCL8")
## # A tibble: 4 × 5
## SampleID subject_id Assay NPX Panel
## <chr> <dbl> <chr> <dbl> <chr>
## 1 1_D0 1 CXCL8 1.46 CARDIOMETABOLIC
## 2 1_D0 1 CXCL8 4.40 INFLAMMATION
## 3 1_D0 1 CXCL8 4.59 NEUROLOGY
## 4 1_D0 1 CXCL8 3.52 ONCOLOGY
# convert data to wide format
covid_NPXdata_wide <- covid_NPXdata_rm %>%
## values_fn calculates median values for duplicated features (duplicated because part of every olink panel)
pivot_wider(names_from = Assay, values_from = NPX,id_cols = subject_id, values_fn = median)
# make data ready for GSVA
covid_NPXdata_mat <- covid_NPXdata_wide %>%
column_to_rownames("subject_id") %>%
as.matrix() %>%
t()
#### Run single sample gene set enrichment analysis
library(GSVA)
# run ssgsea on signature from sPLSDA
GSE_results <- gsva(expr = covid_NPXdata_mat,
gset.idx.list = list(sig=plsda.selection),verbose=F,
method="zscore")
#data_plot_sPLSDA <- data.frame(group=as.factor(covid_clinicalData$WHO.0),score=as.vector(GSE_results))
GSEA_result_df <- data.frame(subject_id = colnames(covid_NPXdata_mat),
ssES = as.vector(GSE_results)) %>%
right_join(mgh.covid.meta %>% transmute(subject_id= as.character(subject_id),
who_0 = as.factor(who_0)), by="subject_id")
# show results
(MGH_covid_ssES <- GSEA_result_df %>%
ggplot(aes(x=who_0, y=ssES, fill=who_0)) +
geom_jitter(width=0.15,size=.3,alpha=.2) +
# geom_violin(width=1.5, trim = F, alpha=0.7,show.legend = F,lwd=.25) +
geom_boxplot(width=.3,alpha=.6, fatten = 2,lwd=.25,outlier.colour = NA) +
#geom_boxplot(alpha=0.6, width=.2, show.legend = F)+
theme_minimal()+
theme(legend.position = "none") +
labs(y="ssES score (zscore)",x="Severity group",fill=""))
#### Make data ready
MIP.long <- data.long %>%
left_join(sampleTable_simple) %>%
filter(!grepl("D10",sample_id)) %>%
left_join(patient_clust) %>%
mutate(sample_type = case_when(Time=="Acute" ~ paste0(severity_lab," malaria"),
.default = paste0(#Time,
"Malaria",
" convalescence"))) %>%
transmute(sample_id, sample_type, Assay, NPX)
data_tf_mip_wide <- TF.long %>%
bind_rows(MIP.long) %>%
pivot_wider(names_from = Assay, values_from = NPX, id_cols = c(sample_id,sample_type), values_fn = median)
all.mat <- data_tf_mip_wide %>%
dplyr::select(-c(sample_type)) %>%
column_to_rownames("sample_id") %>%
as.matrix() %>%
t()
#### Run single sample gene set enrichment analysis
library(GSVA)
# run ssgsea on signature from sPLSDA
GSE_results <- gsva(expr = all.mat,
gset.idx.list = list(sig=plsda.selection),#list(sig=severity_signature_proteins),
verbose=F,
method="zscore")
GSEA_result_df <- data.frame(sample_id = colnames(all.mat),
ssES = as.vector(GSE_results)) %>%
left_join(data_tf_mip_wide %>% dplyr::select(sample_id,sample_type), by="sample_id") %>%
left_join(
TF_SOFA %>% transmute(sample_id = paste0(study_id,"|Acute"), SOFA_total) %>%
bind_rows(
subjectTable %>% transmute(sample_id = paste0(study_id,"|Acute"), SOFA_total)
)) %>%
mutate(sample_type = case_when(sample_type == "Influensa A" ~ "Influenza A",
sample_type == "Influensa B" ~ "Influenza B",
.default = sample_type))
# show results
(TF_ssES_plot <- GSEA_result_df %>%
ggplot(aes(x=fct_reorder(sample_type,ssES, median,.desc = T), y=ssES, fill=sample_type)) +
geom_jitter(width=0.15,size=.3,alpha=.2) +
geom_boxplot(width=.3,alpha=.6, fatten = 2,lwd=.25,outlier.colour = NA) +
theme_minimal() +
scale_fill_manual(values=c("severe malaria"=patient_kclust3_lab[[3]],
"moderate malaria"=patient_kclust3_lab[[2]],
"mild malaria"=patient_kclust3_lab[[1]],
"Malaria convalescence"=patient_kclust3_lab_conv[[4]]),
na.value = "#8dd3c7") +
geom_hline(yintercept = 0,linetype="dotted") +
labs(title="Evaluation of malaria disease severity signature",
x=NULL,
y="Gene Set Variation Analysis\nsingle sample enrichment score\n(zscore)",
fill="") +
theme(legend.position = "none") +
scale_x_discrete(labels = label_wrap(8)) )
malaria.severity.siganture %>%
arrange(values) %>%
dplyr::rename(importance = values) %>%
head()
## # A tibble: 6 × 3
## Assay comp importance
## <chr> <chr> <dbl>
## 1 LAYN comp2 -0.816
## 2 TNFRSF1B comp1 -0.704
## 3 SPINK1 comp2 -0.487
## 4 IL2RA comp1 -0.472
## 5 B4GALT1 comp1 -0.434
## 6 TNFRSF10B comp2 -0.283
#write_tsv(paste0(result.dir,"Supplementary_TableS5_SeveritySignature.tsv"))
TF cohort - sampleTable
##gtsummary
library(gtsummary)
TF_sampleTable %>%
dplyr::select(-sample_type) %>%
inner_join(GSEA_result_df, by="sample_id") %>%
transmute(sample_type,
SOFA_total = as.numeric(SOFA_total),
age = as.numeric(age),
gender) %>%
filter(!grepl("Malaria|malaria", sample_type)) %>%
tbl_summary(include = c(sample_type, age, gender,
SOFA_total),
by = sample_type,
statistic = list(all_continuous() ~ "{median} ({min}-{max})",
all_categorical() ~ "{n} / {N} ({p}%)"
),
#digits = all_continuous() ~ 2
digits = c(age ~ 0)
)
Characteristic | Campylobacter, N = 171 | Dengue, N = 191 | E.coli -pyelonefriter, N = 91 | Influenza A, N = 171 | Influenza B, N = 181 | Mycoplasma pneumonier, N = 51 | Salmonella (only feces), N = 121 | Shigella, N = 31 |
---|---|---|---|---|---|---|---|---|
age | 32 (26-61) | 34 (22-66) | 56 (24-87) | 45 (23-88) | 38 (26-68) | 40 (18-72) | 38 (22-61) | 40 (29-50) |
gender | 11 / 17 (65%) | 11 / 19 (58%) | 2 / 9 (22%) | 5 / 17 (29%) | 7 / 18 (39%) | 3 / 5 (60%) | 8 / 12 (67%) | 1 / 3 (33%) |
SOFA_total | ||||||||
    0 | 15 / 17 (88%) | 12 / 19 (63%) | 4 / 5 (80%) | 14 / 17 (82%) | 15 / 18 (83%) | 3 / 4 (75%) | 11 / 12 (92%) | 2 / 3 (67%) |
    1 | 2 / 17 (12%) | 3 / 19 (16%) | 0 / 5 (0%) | 3 / 17 (18%) | 1 / 18 (5.6%) | 1 / 4 (25%) | 1 / 12 (8.3%) | 0 / 3 (0%) |
    2 | 0 / 17 (0%) | 4 / 19 (21%) | 0 / 5 (0%) | 0 / 17 (0%) | 2 / 18 (11%) | 0 / 4 (0%) | 0 / 12 (0%) | 1 / 3 (33%) |
    3 | 0 / 17 (0%) | 0 / 19 (0%) | 1 / 5 (20%) | 0 / 17 (0%) | 0 / 18 (0%) | 0 / 4 (0%) | 0 / 12 (0%) | 0 / 3 (0%) |
    Unknown | 0 | 0 | 4 | 0 | 0 | 1 | 0 | 0 |
1 Median (Minimum-Maximum); n / N (%) |
A protein-centric view on integrated analysis to ascertain immune cell communication associated with disease severity
similar_modules <- c(restricted_module_turquoise,
module_df %>% filter(colors=="brown") %>% pull(Assay),
module_df %>% filter(colors=="blue") %>% pull(Assay))
library(eulerr)
euler_plot <- euler(
list(
"Differential\nabundant\nproteins\nin\nacute malaria"=selected.assays.wcna,
"Severity associated\nproteins in plasma" = restricted_module_turquoise)
)
plot(euler_plot,
fills = c("white",
"turquoise"),
quantities = TRUE,
lty = 1,#1:3,
fontsize=6,
labels = list(fontsize=5),
shape = "ellipse")
secretome_location_dap_severity <- dap.res %>%
filter(Assay %in% restricted_module_turquoise) %>%
inner_join(hpa_24.0, by=c("Assay"="gene","UniProt"="uniprot")) %>%
mutate(secretome_location_tissue_spec = case_when(secretome_location=="Not secreted"~ paste0(secretome_location," - ",rna_tissue_specificity),
.default = secretome_location)) %>%
group_by(secretome_location_tissue_spec) %>%
mutate(n_secretome_location_tissue_spec = n()) #count(sort = TRUE)
## plot everything
(hpa.protein.origin.overview_severity <- secretome_location_dap_severity %>%
ungroup() %>%
transmute(secretome_location_tissue_spec = factor(secretome_location_tissue_spec,
levels = rev(c("Secreted to blood",
"Intracellular and membrane",
"Secreted in other tissues",
"Secreted to extracellular matrix",
"Secreted to digestive system",
"Secreted in brain",
"Secreted - unknown location",
"Secreted in female reproductive system",
"Secreted in male reproductive system",
"Not secreted - Tissue enriched",
"Not secreted - Tissue enhanced",
"Not secreted - Group enriched",
"Not secreted - Low tissue specificity"))),
n_secretome_location_tissue_spec) %>%
distinct() %>%
ggplot(aes(x = secretome_location_tissue_spec, y = n_secretome_location_tissue_spec, fill = secretome_location_tissue_spec)) +
geom_col(width = 0.5) +
geom_text(aes(label=n_secretome_location_tissue_spec),size=2, nudge_y = -.2) +
coord_flip() +
scale_y_continuous(trans="pseudo_log",name = "Number of proteins\nassociated with severity",
sec.axis = sec_axis(~.,labels = NULL,breaks = NULL,
#name = "Number of DAPs"
),
#expand=c(0,.15)
expand = c(0,0)
) +
theme_bw() +
theme(axis.text.y = element_text(size = 6),
axis.text.x = element_text(size = 6),
legend.text=element_text(size=6),
legend.title=element_text(size=6),
plot.title = element_text(size=6),
legend.position = "none")+
scale_fill_manual(values=secretome_location_tissue_spec_cols,
limits = secretome_location_dap.order) +
labs(fill="Protein\norigin\nby HPA",
x=NULL))
## plot everything
(hpa.protein.origin.overview_severity <- secretome_location_dap_severity %>%
ungroup() %>%
transmute(secretome_location_tissue_spec = factor(secretome_location_tissue_spec,
levels = c("Secreted to blood",
"Intracellular and membrane",
"Secreted in other tissues",
"Secreted to extracellular matrix",
"Secreted to digestive system",
"Secreted in brain",
"Secreted - unknown location",
"Secreted in female reproductive system",
"Secreted in male reproductive system",
"Not secreted - Tissue enriched",
"Not secreted - Tissue enhanced",
"Not secreted - Group enriched",
"Not secreted - Low tissue specificity")),
n_secretome_location_tissue_spec) %>%
distinct() %>%
ggplot(aes(x = secretome_location_tissue_spec, y = n_secretome_location_tissue_spec, fill = secretome_location_tissue_spec)) +
geom_col(width = 0.5) +
geom_text(aes(label=n_secretome_location_tissue_spec),size=2, nudge_y =1.5) + #0.1
#coord_flip() +
scale_y_continuous(#trans="pseudo_log",
name = "Number of proteins",
sec.axis = sec_axis(~.,labels = NULL,breaks = NULL,
#name = "Number of DAPs"
),
#expand=c(0,.15)
#expand = c(0,0.1),
limits = c(0,51)
) +
theme_bw() +
theme(axis.text.y = element_text(size = 6),
axis.text.x = element_text(size = 6,angle=90,hjust = 1,vjust = 0.5),
#axis.text.x = element_text(size = 6,angle=45,hjust = 1),
axis.title.y = element_text(size=6),
legend.text=element_text(size=6),
legend.title=element_text(size=6),
plot.title = element_text(size=6),
legend.position = "none")+
scale_fill_manual(values=secretome_location_tissue_spec_cols,
limits = secretome_location_dap.order) +
labs(fill="Protein\norigin\nby HPA",
x=NULL))
proteins2label <- df_acute_patclust_incl_conv %>%
group_by(UniProt,Assay, severity_lab) %>%
summarise(NPXmean = mean(NPX),
NPXmedian = median(NPX),
NPXsd = sd(NPX),
NPXn = n(),
NPXse = NPXsd / sqrt(NPXn)
) %>%
#ungroup() %>%
mutate(NPXci95 = NPXse * qt(.975, NPXn - 1)) %>%
left_join(
secretome_location_dap_severity %>%
filter(Assay %in% restricted_module_turquoise) %>%
transmute(Assay,secretome_location_tissue_spec), by="Assay"
) %>%
filter(severity_lab=="severe",
Assay %in% restricted_module_turquoise,
!is.na(secretome_location_tissue_spec)) %>%
group_by(secretome_location_tissue_spec) %>%
#transmute(Assay,NPX, severity_lab, secretome_location_tissue_spec) %>%
distinct() %>%
slice_max(order_by = NPXmean,n = 3) %>% pull(Assay)
tmp.df <- df_acute_patclust_incl_conv %>%
group_by(UniProt,Assay, severity_lab) %>%
summarise(NPXmean = mean(NPX),
NPXmedian = median(NPX),
NPXsd = sd(NPX),
NPXn = n(),
NPXse = NPXsd / sqrt(NPXn)
) %>%
#ungroup() %>%
mutate(NPXci95 = NPXse * qt(.975, NPXn - 1)) %>%
filter(Assay%in%restricted_module_turquoise) %>%
left_join(
secretome_location_dap_severity%>%
transmute(Assay, secretome_location_tissue_spec), by="Assay")
(turqoise_allProteins_lab <- tmp.df %>%
ggplot(aes(x=fct_reorder(Assay,NPXmean,.desc = F), y=NPXmean)) +
geom_point(shape=16,size=.5,aes(color=severity_lab)) +
geom_errorbar(aes(x = Assay,#reorder(str_wrap(Assay, 5), estimate),
ymin=NPXmean-NPXci95,
ymax=NPXmean+NPXci95,
color=severity_lab),
linewidth=.2, # Thinner lines
width=.2,
alpha=.5) +
geom_hline(yintercept = 0,lty=5, lwd=.2) +
ggrepel::geom_text_repel(data = . %>% filter(Assay %in% proteins2label,#c("LGALS9","HAVCR2","IL4","IL4R","CD70","PDCD1","CD274"),
severity_lab == "severe"),
aes(label = Assay, colour=secretome_location_tissue_spec, nudge_y=NPXmean),
size = .9,
force_pull = 3, # do not pull toward data points
force = .15, # Strength of the repulsion force.
nudge_x = 0,
# Do not repel from top or bottom edges.
ylim = c(1, Inf),
direction = "y",
angle = 90,
hjust = 0,
segment.size = 1/20, ## segment width
segment.linewidth = 1/12,#0.01,
arrow = arrow(length = unit(0.04, 'npc')), # Draw an arrow from the label to the data point.
max.overlaps = 50,
max.iter = 3e3, # Maximum iterations of the naive repulsion algorithm O(n^2).
color = "grey10"
) +
scale_y_continuous(limits=c(-1.5,8.5),expand = c(0,0)) +
scale_color_manual(values = patient_kclust3_lab_conv) +
coord_cartesian(clip = "off") +
#coord_flip() +
labs(x="Severity associated proteins in plasma",
y="NPX",
color=NULL,
caption = "meanNPX +- ci95") +
theme(legend.position = "top",
axis.text.x = element_text(colour = "black",angle=90,hjust=1,vjust=.5,size = 2),
axis.text.y = element_text(size = 6),
panel.grid.minor = element_line(size = 0.1),
panel.grid.major = element_line(size = .1),
#axis.text.x = element_blank()
)
)
require(patchwork)
(p_annotation <- #secretome_location_dap_severity%>%
#transmute(Assay, secretome_location_tissue_spec) %>%
# left_join(tmp.df,by="Assay") %>%
tmp.df %>%
mutate(dummy.y = "HPA") %>%
ggplot(aes(x = fct_reorder(Assay,NPXmean,.desc = F), y = dummy.y, fill = secretome_location_tissue_spec)) +
geom_tile(linejoin = "round") +
scale_fill_manual(values = secretome_location_tissue_spec_cols) +
theme_void() +
theme(legend.position = "none")
#scale_y_discrete(expand=c(0,-0.1))
# theme(aspect.ratio = 1/100)
)
test_plot <- p_annotation / turqoise_allProteins_lab + plot_layout(height = c(.5, 4))
test_plot
## create a gene name - uniprot dictionary
name_up_dict <- hpa_24.0 %>% transmute(gene, uniprot)
ligand.q <- dap.res %>% filter(p.adj <=0.01, logFC > .1) %>%
left_join(cpdb.protein_input,
by=c("UniProt"="uniprot")) #%>%
#pull(UniProt)
length(ligand.q)
## [1] 32
interaction_dict <- cpdb.interaction_input %>%
filter(partner_a %in% ligand.q$UniProt,
directionality == "Ligand-Receptor"
#directionality %in% c("Ligand-Receptor","Receptor-Receptor","Ligand-Ligand")
) %>%
mutate(protein_name_b_strip = gsub("_HUMAN","",protein_name_b),
protein_name_a = gsub("_HUMAN","",protein_name_a)) %>%
mutate(protein_name_b_complex = case_when(is.na(protein_name_b) ~ str_remove(interactors,paste0(protein_name_a,"-")),
.default = protein_name_b)) %>%
separate_longer_delim(protein_name_b_complex, delim = "+") %>%
left_join(hpa_24.0 %>% transmute(protein_name_b_complex = gene,
uniprot_b_complex = uniprot), by=c("protein_name_b_complex")) %>%
mutate(protein_name_b = case_when(is.na(protein_name_b) ~ protein_name_b_complex,
.default = protein_name_b),
partner_b_new = case_when(is.na(uniprot_b_complex) ~ partner_b,
.default = uniprot_b_complex)) %>%
transmute(partner_a, partner_b, partner_b_new) %>%
mutate(uniprot_a = partner_a,
uniprot_b = partner_b_new) %>%
transmute(source = uniprot_a,
recipient = uniprot_b) %>%
left_join(name_up_dict %>% dplyr::rename(source_gene = gene), by=c("source" = "uniprot")) %>%
left_join(name_up_dict %>% dplyr::rename(recipient_gene = gene), by=c("recipient" = "uniprot"))
interaction_dict
## # A tibble: 265 × 4
## source recipient source_gene recipient_gene
## <chr> <chr> <chr> <chr>
## 1 P19022 P06734 CDH2 FCER2
## 2 P05362 P20701 ICAM1 ITGAL
## 3 P05362 P20701 ICAM1 ITGAL
## 4 P05362 P05107 ICAM1 ITGB2
## 5 P05362 P11215 ICAM1 ITGAM
## 6 P05362 P05107 ICAM1 ITGB2
## 7 P05362 P20702 ICAM1 ITGAX
## 8 P05362 P05107 ICAM1 ITGB2
## 9 P13598 Q9NNX6 ICAM2 CD209
## 10 P13598 P20701 ICAM2 ITGAL
## # ℹ 255 more rows
celltype_l2_freq <- tibble(pbmc_acute@meta.data) %>%
group_by(CellType_L2) %>%
summarise(n = n()) %>%
mutate(freq = n / sum(n),
Percentage = freq*100)
celltype_l2_of_l1_freq <- tibble(pbmc_acute@meta.data) %>%
group_by(CellType_L1,CellType_L2) %>%
summarise(n = n()) %>%
group_by(CellType_L1) %>%
mutate(freq = n / sum(n),
Percentage = freq*100)
circosplot function
my_nice_circosplot <- function(assays2plot_circos, scale_range, expression_threshold, pdf_file_name){
pbmc_acute.avg.long_tourquoise <- pbmc_acute.avg.long %>%
filter(celltype != "undefined",
gene %in% assays2plot_circos,
) %>%
mutate(gene_ct = paste0(gene,"_",celltype)) %>%
group_by(gene) %>%
mutate(avgExp = scales::rescale(avgExp, to=scale_range)) %>% #c(0,1)
ungroup()
mat <- pbmc_acute.avg.long_tourquoise %>%
filter(avgExp >expression_threshold) %>% #.5
left_join(tibble(pbmc@meta.data) %>% transmute(celltype_1 = CellType_L1,
celltype = CellType_L2) %>% distinct(), by="celltype") %>%
mutate(celltype_1 = factor(celltype_1, levels= c("DC","Monocytes","NK","gdT","B","CD4+ T","CD8+ T")),
celltype = factor(celltype, levels = c("mDC", "pDC",
"CD14 monocytes", "CD16 monocytes",
"NK CD56dim CD16+", "NK CD56dim","NK CD56bright","NK prolif.",
"Vd2+ gdT", "Vd2- gdT",
"B naive", "B memory", "Plasma cells",
"CD4 naive", "CD4 Treg CD80+", "CD4 Treg CD80-", "CD4 Tfh",
"CD4 effect. activated", "CD4 effect. memory",
"CD4 trans. memory","CD4 central memory",
"CD8 naive", "CD8 trans. memory", "CD8 Tfh",
"NKT", "CD8 effect. memory"))) %>%
add_rownames()
tmp <- mat %>% dplyr::rename(index = rowname)
df_link <- interaction_dict %>%
transmute(source_gene, recipient_gene) %>%
left_join(tmp %>%
filter(gene %in% interaction_dict$source_gene) %>%
transmute(from_index = index,
gene),
by=c("source_gene"="gene")) %>%
filter(!is.na(from_index)) %>%
left_join(tmp %>%
transmute(to_index = index,
gene),
by=c("recipient_gene"="gene")) %>%
filter(!is.na(to_index)) %>%
distinct() %>%
right_join(mat %>% transmute(gene, celltype) %>%
left_join(data.frame(celltype = names(L2_colors),
source_celltype_colors = L2_colors)),
by=c("source_gene"="gene")) %>%
transmute(from_index = as.integer(from_index),
to_index = as.integer(to_index),
source_celltype = celltype,
source_gene,
recipient_gene) %>%
na.omit() %>%
distinct()
## goal
## from_index; to_index data frame
mat_gex <- mat %>% arrange(celltype) %>%
column_to_rownames("gene_ct") %>%
dplyr::select(avgExp)
mat_npx <- mat %>%
transmute(gene_ct, gene) %>%
left_join(df_acute_patclust_incl_conv %>%
transmute(Assay, NPX,severity_lab) %>%
#filter(Assay %in% restricted_module_turquoise) %>%
pivot_wider(names_from = severity_lab, values_from = NPX,values_fn = median)
, by=c("gene"="Assay")) %>%
dplyr::select(-gene) %>%
distinct() %>%
column_to_rownames("gene_ct") %>%
relocate(severe, moderate, mild, convalescence)
## legends
col_npx <- colorRamp2(c(min(mat_npx,na.rm = T),
0,
max(mat_npx,na.rm = T)/2,
max(mat_npx,na.rm = T)),
c("#edf8e9","#bae4b3", "#74c476","#238b45"))
#mat_npx <- mat_npx %>% mutate(across(where(is.numeric), ~ scales::rescale(., to=c(4,0))))
cell_freq_color <- colorRamp2(c(0,
min(celltype_l2_freq$Percentage,na.rm = T),
mean(celltype_l2_freq$Percentage,na.rm=T),
max(celltype_l2_freq$Percentage,na.rm = T)),
c("#f2f0f7","#cbc9e2","#9e9ac8","#6a51a3"))
lgd_npx = Legend(title = "NPX", col_fun = col_npx,
title_gp = gpar(fontsize=6,fontface="bold"),
labels_gp = gpar(fontsize=6))
lgd_gex = Legend(title = "GEX", col_fun = scaled_01_col,
title_gp = gpar(fontsize=6,fontface="bold"),
labels_gp = gpar(fontsize=6))
lgd_celltype_freq = Legend(title = "Cell frequency\nof CD45+",
col_fun = cell_freq_color,
title_gp = gpar(fontsize=6,fontface="bold"),
labels_gp = gpar(fontsize=6))
lgd_severity = Legend(title = "Severity group",
at = c("severe","moderate","mild","convalescence"),
legend_gp = gpar(fill = c("#ca0020","#f4a582","#92c5de","grey50")),
title_gp = gpar(fontsize=6,fontface="bold"),
labels_gp = gpar(fontsize=6))
lgd_celltype = Legend(title = "Celltypes",
at = c("DC","Monocytes","NK","gdT","B","CD4+ T","CD8+ T"),
legend_gp = gpar(fill = c("#ca5369","#688bcc","#8761cc", "#ae953e",
"#c361aa","#68a748","#cc693d")),
ncol = 1,
#nrow = 1,
title_gp = gpar(fontsize=6,fontface="bold"),
labels_gp = gpar(fontsize=6))
lgd_celltype_2 = Legend(title = "Celltypes",
at = c("mDC","pDC",
"CD14 monocytes","CD16 monocytes",
"NK CD56dim CD16+","NK CD56dim","NKCD56bright","NK prolif.","NKT",
"Vd2+ gdT","Vd2- gdT",
"B naive",
"B memory",
"Plasma cells",
"CD4 naive", "CD4 Treg CD80+", "CD4 Treg CD80-", "CD4 Tfh",
"CD4 effect. activated","CD4 effect. memory","CD4 trans.memory","CD central memory",
"CD8 naive", "CD8 trans. memory","CD8Tfh","CD8 effect. memory"),
#,(L2_colors),
legend_gp = gpar(fill = L2_colors),
ncol=1,
#nrow = 4,
title_gp = gpar(fontsize=6,fontface="bold"),
labels_gp = gpar(fontsize=6))
circle_size = unit(1, "snpc") # snpc unit gives you a square region
## == circos.heatmap.get.x start ====
## A function to extract row indicies, useful for labelling
## source: https://rdrr.io/github/jokergoo/circlize/src/R/circos.heatmap.R
# == title
# Get the x-position for heatmap rows
#
# == param
# -row_ind A vector of row indicies.
#
# == value
# A three-column data frame of the sector, the x-positions on the corresponding sectors, and the original row indicies.
circos.heatmap.get.x = function(row_ind) {
env = circos.par("__tempenv__")
split = env$circos.heatmap.split
row_ind_lt = split(row_ind, split[row_ind])
row_ind_lt = row_ind_lt[sapply(row_ind_lt, length) > 0]
x = NULL
for(i in row_ind_lt) {
subset = get.cell.meta.data("subset", sector.index = split[i[1]])
order = get.cell.meta.data("row_order", sector.index = split[i[1]])
x = c(x, which((1:length(split))[subset][order] %in% i))
}
df = data.frame(sector = rep(names(row_ind_lt), times = sapply(row_ind_lt, length)),
x = x - 0.5, row_ind = unlist(row_ind_lt))
rownames(df) = NULL
df
}
## == circos.heatmap.get.x end ====
total_sections <- length(levels(mat$celltype))
## the function to make the plot
circlize_plot = function() {
circos.clear()
circos.par(gap.after = c(rep(2,total_sections-1),10),
points.overflow.warning = T)
#circos.par(start.degree = 90, gap.degree = 10,gap.after = c(3))
## dummy track, invisible, needed for split
circos.heatmap(mat_gex,
cluster = F,
split = droplevels(mat$celltype),
col = colorRamp2(c(-2, 0, 2), c("white", "white", "white")),
track.height = 0.21,#0.000000001,
)
## celltype annotation track
circos.heatmap(mat %>% column_to_rownames("gene_ct") %>% dplyr::select(celltype),
col = L2_colors,
track.height = 0.08,
rownames.side = "none",
)
## celltype frequency track
circos.heatmap(mat %>% column_to_rownames("gene_ct") %>% transmute(CellType_L2 = celltype) %>% left_join(celltype_l2_freq, by="CellType_L2") %>% pull(Percentage), col = cell_freq_color, track.height = 0.01)
## celltype annotation tack naming
circos.trackPlotRegion(track.index = 1, panel.fun = function(x, y) {
xlim = get.cell.meta.data("xlim")
ylim = get.cell.meta.data("ylim")
sector.name = get.cell.meta.data("sector.index")
circos.text(mean(xlim),
ylim[1] + .1,
sector.name,
facing = "clockwise",
niceFacing = TRUE, cex=.6,
adj = c(0, 0.5), col = "grey40")
}, bg.border = NA)
## celltype gene expression track
circos.heatmap(mat_gex,
cluster = F,
col = scaled_01_col,
track.height = 0.04,
# rownames.side = "outside",
bg.border = "grey80",
bg.lwd = .1,
bg.lty = .1,
show.sector.labels = F
)
## plasma NPX trac
circos.heatmap(mat_npx, col = col_npx, track.height = 0.09)
## add annotation to row of npx data
circos.track(track.index = get.current.track.index(), panel.fun = function(x, y) {
if(CELL_META$sector.numeric.index == total_sections) { # the last sector #26
## conval
circos.rect(CELL_META$cell.xlim[2] + convert_x(1, "mm"), 0,
CELL_META$cell.xlim[2] + convert_x(4, "mm"), 1, #10
col = "grey50", border = NA)
## mild
circos.rect(CELL_META$cell.xlim[2] + convert_x(1, "mm"), 1,
CELL_META$cell.xlim[2] + convert_x(4, "mm"), 2, #5
col = "#92c5de", border = NA)
## moderate
circos.rect(CELL_META$cell.xlim[2] + convert_x(1, "mm"), 2,
CELL_META$cell.xlim[2] + convert_x(4, "mm"), 3, #10
col = "#f4a582", border = NA)
## severe
circos.rect(CELL_META$cell.xlim[2] + convert_x(1, "mm"), 3,
CELL_META$cell.xlim[2] + convert_x(4, "mm"), 4, #10
col = "#ca0020", border = NA)
}
}, bg.border = NA)
## Annotate source genes
row_ind <- mat %>% filter(gene%in%df_link$source_gene) %>% mutate(rowname = as.integer(rowname)) %>% pull(rowname)
pos_so = circos.heatmap.get.x(row_ind)
pos_so <- pos_so %>% right_join(mat %>% filter(gene%in%df_link$source_gene) %>% mutate(row_ind = as.integer(rowname)), by="row_ind")
## Annotate recipient genes
row_ind <- mat %>% filter(gene%in%df_link$recipient_gene) %>% mutate(rowname = as.integer(rowname)) %>% pull(rowname)
pos_rec = circos.heatmap.get.x(row_ind)
pos_rec <- pos_rec %>% right_join(mat %>% filter(gene%in%df_link$recipient_gene) %>% mutate(row_ind = as.integer(rowname)), by="row_ind")
pos <- bind_rows(pos_so, pos_rec)
## lable all other genes
## Annotate source genes
row_ind_allothers <- mat %>% filter(!gene %in% c(df_link$source_gene,df_link$recipient_gene)) %>% mutate(rowname = as.integer(rowname)) %>% pull(rowname)
pos_allothers = circos.heatmap.get.x(row_ind_allothers) %>%
left_join(mat %>% mutate(row_ind = as.integer(rowname)), by="row_ind")
## join all lable info
pos_all <- bind_rows(pos %>% mutate(col = "black",
size = .4),
pos_allothers %>% mutate(col = "grey",
size=.2))
circos.labels(pos_all$sector,
pos_all$x,
connection_height =.01,
cex =pos_all$size,
side="inside",
col = pos_all$col,
labels = pos_all$gene)
## add connections
for(i in seq_len(nrow(df_link))) {
circos.heatmap.link(df_link$from_index[i],
df_link$to_index[i],
col = rand_color(1),
lwd = 1.5,
directional = 1,
arr.width = .125,
arr.length = .2,
arr.lwd = .1,
arr.col = "black")
}
circos.clear()
}
library(gridBase)
pdf(paste0(result.tmp.dir,pdf_file_name,".pdf"))#,paper = "a4r")
plot.new()
circle_size = unit(1, "snpc") # snpc unit gives you a square region
pushViewport(viewport(x = 0.5, y = 1,#0.5,
width = circle_size,
height = circle_size,
just = c("center", "top")))
par(omi = gridOMI(), new = TRUE)
circlize_plot()
upViewport()
draw(packLegend(lgd_gex, lgd_npx,
direction = "horizontal"),
y = unit(1, "npc") - circle_size*.1,
x = unit(1.04,"npc") - circle_size*0.1,
just = c("center","right"))
draw(packLegend(lgd_celltype_freq,
direction = "horizontal"),
#y = unit(1, "npc") - circle_size*.1,
x = unit(1.04,"npc") - circle_size*0.1,
just = c("center","right"))
draw(packLegend(lgd_severity,
direction = "horizontal"),
y = unit(1, "npc") - circle_size*.8,
x = unit(1.0,"npc") - circle_size*0.11,
just = c("bottom","left"))
dev.off()
return(circlize_plot())
}
my_nice_circosplot(assays2plot_circos = restricted_module_turquoise,
scale_range = c(0,1),
expression_threshold = 0.5,
pdf_file_name = "ImmuneCell_Protein_CircosPlot_severitymodule"
)
my_comparisons_severe_conv <- list(c("severe", "moderate"), c("moderate", "mild"), c("severe", "mild"),c("mild","convalescence"))
selection <- c("LGALS9","IL4","CD274","CD70")
(severity_ligand_npx <- df_acute_patclust_incl_conv %>%
dplyr::filter(Assay %in% c(selection)) %>%
mutate(Assay = factor(Assay, levels = c(selection))) %>%
ggplot(aes(x=severity_lab, y=NPX, color=severity_lab, fill=severity_lab)) +
geom_violin(trim = F,alpha=.9) +
geom_jitter(size=0.25,show.legend = F, width = 0.05, alpha=1, color="grey20") +
geom_boxplot(alpha=.7,width=0.25,outlier.shape = NA,color="black", fatten = 2,lwd=.25,show.legend = F) +
stat_compare_means(method = "wilcox.test",
label.sep = "\n",
hide.ns = T,
label = "p.signif" ,
vjust = .5,
size=2,
lwd = .2,
comparisons =my_comparisons_severe_conv,
show.legend = F) +
facet_wrap(~Assay,nrow = 2,scales = "free_y") +
theme_minimal() +
theme(legend.position="bottom",
axis.text.x = element_blank()) +
labs(x="",
color=NULL,
fill=NULL) +
scale_color_manual(values= patient_kclust3_lab_conv) +
scale_fill_manual(values= patient_kclust3_lab_conv))
#FACs_data %>% distinct(feature)
cellsubset.order <- FACs_data %>%
filter(feature%in%c("CD4+CD38+HLADR+ T cells","CD8+CD38+HLADR+ T cells"),
# grepl("Treg",feature)
) %>% distinct(feature) %>% pull()
test_correlation_input <- FACs_data %>%
filter(Time=="Acute",
feature%in%
c(
"CD4+CD38+HLADR+ T cells",
"CD8+CD38+HLADR+ T cells"
)
) %>%
transmute(sample_id = sampleID,
type_feature = paste0("FACS_",feature),
value) %>%
pivot_wider(values_from = value, names_from = type_feature) %>%
inner_join(
## Plasma protein data from this paper
data.long %>%
inner_join(sampleTable_simple) %>%
filter(Assay %in% restricted_module_turquoise) %>%
transmute(
sample_id,
type_assay = paste0("PEA_",Assay),
NPX) %>%
pivot_wider(values_from = NPX, names_from = type_assay),
by="sample_id")
test_correlation_res <- test_correlation_input %>%
column_to_rownames("sample_id") %>%
correlation(p_adjust = "fdr",method = "spearman",redundant = F) %>%
tibble()
cor.g <- test_correlation_res %>%
filter(Parameter1!=Parameter2) %>%
filter(
!grepl("PEA",Parameter1),
!grepl("FACS",Parameter2)
) %>%
arrange(-abs(rho)) %>%
filter(p<=0.05) %>%
transmute(from = Parameter1,# = str_remove(Parameter1, "FACS_"),
to = Parameter2,# = str_remove(Parameter2, "PEA_"),
rho,
p
) %>%
as_tbl_graph(directed = F)
node_table <- as_tibble(cor.g) %>%
separate(name, into=c("omics","feature"),sep = "_",remove = F)
(protein_cellnum_cornet <- cor.g %>%
inner_join(node_table,by="name") %>%
activate(nodes) %>% # Sets context to nodes -> subsequent operations are performed on nodes
mutate(deg = centrality_degree()) %>%
filter(!node_is_isolated()) %>% # Removes nodes that are isolated/do not have any follower edges
# create_layout(layout = "igraph", algorithm = "fr") %>%
create_layout(layout = "stress") %>% #fr
ggraph() +
geom_edge_link(aes(color = rho),
# alpha = 0.9
) +
scale_edge_color_continuous(low="thistle2",
high="darkred") +
geom_node_point(aes(color = omics,
size= ifelse(omics=="FACS",3,1)),
show.legend = F) +
geom_node_text(aes(label = feature,
alpha= ifelse(deg>1,1,.5)
),
color="black",
size=1.5,
repel = T,show.legend = F
) +
scale_alpha_continuous(range = c(0.5, 1)) +
theme_void() +
guides(color= guide_colorbar(barheight = 1, barwidth = .1)) +
theme(legend.position = "right",
plot.title = element_text(size=6),
legend.title = element_text( size=6),
legend.text=element_text(size=6))
)
sessionInfo()
## R version 4.3.2 (2023-10-31)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Sonoma 14.7.4
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: Europe/Berlin
## tzcode source: internal
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] gridBase_0.4-7 GSVA_1.48.3 UpSetR_1.4.0
## [4] see_0.8.0 pathview_1.40.0 ggtext_0.1.2
## [7] gtsummary_1.7.2 emmeans_1.8.7 lmerTest_3.1-3
## [10] lme4_1.1-34 eulerr_7.0.2 mixOmics_6.24.0
## [13] lattice_0.21-8 MASS_7.3-60 WGCNA_1.72-1
## [16] fastcluster_1.2.3 dynamicTreeCut_1.63-1 scales_1.2.1
## [19] ggraph_2.1.0 tidygraph_1.2.3 umap_0.2.10.0
## [22] clusterProfiler_4.8.2 SeuratObject_4.1.3 Seurat_4.3.0.1
## [25] ggalluvial_0.12.5 ggrepel_0.9.3 correlation_0.8.4
## [28] ggpubr_0.6.0 igraph_1.5.0.1 broom_1.0.5
## [31] circlize_0.4.15 ComplexHeatmap_2.16.0 RColorBrewer_1.1-3
## [34] viridis_0.6.4 viridisLite_0.4.2 gridExtra_2.3
## [37] patchwork_1.1.2 Matrix_1.6-0 DT_0.28
## [40] readxl_1.4.3 lubridate_1.9.2 forcats_1.0.0
## [43] stringr_1.5.0 dplyr_1.1.2 purrr_1.0.1
## [46] readr_2.1.4 tidyr_1.3.0 tibble_3.2.1
## [49] ggplot2_3.4.2 tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] graph_1.78.0 ica_1.0-3
## [3] plotly_4.10.2 Formula_1.2-5
## [5] zlibbioc_1.46.0 tidyselect_1.2.0
## [7] bit_4.0.5 doParallel_1.0.17
## [9] clue_0.3-64 rjson_0.2.21
## [11] blob_1.2.4 broom.helpers_1.13.0
## [13] S4Arrays_1.0.5 pbkrtest_0.5.2
## [15] parallel_4.3.2 Biobase_2.60.0
## [17] png_0.1-8 cli_3.6.3
## [19] ggplotify_0.1.1 bayestestR_0.13.1
## [21] askpass_1.1 openssl_2.1.0
## [23] goftest_1.2-3 broom.mixed_0.2.9.4
## [25] uwot_0.1.16 shadowtext_0.1.2
## [27] curl_5.0.1 mime_0.12
## [29] evaluate_0.21 tidytree_0.4.4
## [31] leiden_0.4.3 V8_4.3.3
## [33] stringi_1.7.12 backports_1.4.1
## [35] XML_3.99-0.14 httpuv_1.6.11
## [37] AnnotationDbi_1.62.2 magrittr_2.0.3
## [39] splines_4.3.2 org.Hs.eg.db_3.17.0
## [41] sctransform_0.3.5 ggbeeswarm_0.7.2
## [43] DBI_1.1.3 ggExtra_0.10.1
## [45] HDF5Array_1.28.1 jquerylib_0.1.4
## [47] withr_2.5.0 corpcor_1.6.10
## [49] enrichplot_1.20.0 lmtest_0.9-40
## [51] GSEABase_1.62.0 htmlwidgets_1.6.2
## [53] S4Vectors_0.38.1 SingleCellExperiment_1.22.0
## [55] ggvenn_0.1.10 labeling_0.4.2
## [57] cellranger_1.1.0 MatrixGenerics_1.12.3
## [59] annotate_1.78.0 reticulate_1.30
## [61] zoo_1.8-12 XVector_0.40.0
## [63] knitr_1.43 BiocGenerics_0.46.0
## [65] gt_0.9.0 timechange_0.2.0
## [67] foreach_1.5.2 fansi_1.0.4
## [69] data.table_1.14.8 ggtree_3.8.0
## [71] rhdf5_2.44.0 RSpectra_0.16-1
## [73] irlba_2.3.5.1 ggrastr_1.0.2
## [75] gridGraphics_0.5-1 commonmark_1.9.0
## [77] ellipsis_0.3.2 lazyeval_0.2.2
## [79] yaml_2.3.7 survival_3.5-7
## [81] scattermore_1.2 crayon_1.5.2
## [83] RcppAnnoy_0.0.21 progressr_0.13.0
## [85] tweenr_2.0.2 later_1.3.1
## [87] Rgraphviz_2.44.0 ggridges_0.5.4
## [89] codetools_0.2-19 base64enc_0.1-3
## [91] GlobalOptions_0.1.2 KEGGREST_1.40.0
## [93] ggfittext_0.10.1 Rtsne_0.16
## [95] shape_1.4.6 estimability_1.4.1
## [97] foreign_0.8-85 pkgconfig_2.0.3
## [99] KEGGgraph_1.60.0 xml2_1.3.5
## [101] GenomicRanges_1.52.0 IRanges_2.34.1
## [103] aplot_0.1.10 spatstat.sparse_3.0-2
## [105] ape_5.7-1 xtable_1.8-4
## [107] car_3.1-2 highr_0.10
## [109] plyr_1.8.8 polylabelr_0.2.0
## [111] httr_1.4.6 tools_4.3.2
## [113] globals_0.16.2 beeswarm_0.4.0
## [115] htmlTable_2.4.1 checkmate_2.2.0
## [117] nlme_3.1-163 HDO.db_0.99.1
## [119] digest_0.6.33 numDeriv_2016.8-1.1
## [121] furrr_0.3.1 farver_2.1.1
## [123] tzdb_0.4.0 reshape2_1.4.4
## [125] yulab.utils_0.0.6 rpart_4.1.21
## [127] glue_1.6.2 cachem_1.0.8
## [129] polyclip_1.10-4 Hmisc_5.1-0
## [131] generics_0.1.3 Biostrings_2.68.1
## [133] stats4_4.3.2 mvtnorm_1.2-2
## [135] parallelly_1.36.0 impute_1.74.1
## [137] ScaledMatrix_1.8.1 carData_3.0-5
## [139] minqa_1.2.5 pbapply_1.7-2
## [141] randomcoloR_1.1.0.1 SummarizedExperiment_1.30.2
## [143] vroom_1.6.3 gson_0.1.0
## [145] utf8_1.2.3 graphlayouts_1.0.0
## [147] datawizard_0.8.0 preprocessCore_1.62.1
## [149] ggsignif_0.6.4 shiny_1.7.4.1
## [151] GenomeInfoDbData_1.2.10 rhdf5filters_1.12.1
## [153] parameters_0.21.1 RCurl_1.98-1.12
## [155] memoise_2.0.1 rmarkdown_2.23
## [157] downloader_0.4 future_1.33.0
## [159] RANN_2.6.1 Cairo_1.6-1
## [161] spatstat.data_3.0-1 rstudioapi_0.15.0
## [163] cluster_2.1.4 janitor_2.2.0
## [165] spatstat.utils_3.0-3 hms_1.1.3
## [167] fitdistrplus_1.1-11 munsell_0.5.0
## [169] cowplot_1.1.1 colorspace_2.1-0
## [171] ellipse_0.5.0 rlang_1.1.1
## [173] GenomeInfoDb_1.36.1 DelayedMatrixStats_1.22.6
## [175] sparseMatrixStats_1.12.2 ggforce_0.4.1
## [177] mgcv_1.9-0 xfun_0.39
## [179] coda_0.19-4 iterators_1.0.14
## [181] matrixStats_1.0.0 rARPACK_0.11-0
## [183] abind_1.4-5 GOSemSim_2.26.1
## [185] treeio_1.25.2 Rhdf5lib_1.22.1
## [187] bitops_1.0-7 promises_1.2.0.1
## [189] scatterpie_0.2.1 RSQLite_2.3.1
## [191] qvalue_2.32.0 fgsea_1.26.0
## [193] DelayedArray_0.26.7 GO.db_3.17.0
## [195] compiler_4.3.2 beachmat_2.16.0
## [197] boot_1.3-28.1 listenv_0.9.0
## [199] Rcpp_1.0.11 BiocSingular_1.16.0
## [201] tensor_1.5 BiocParallel_1.34.2
## [203] insight_0.19.3 gridtext_0.1.5
## [205] spatstat.random_3.1-5 R6_2.5.1
## [207] fastmap_1.1.1 fastmatch_1.1-3
## [209] rstatix_0.7.2 vipor_0.4.5
## [211] ROCR_1.0-11 rsvd_1.0.5
## [213] nnet_7.3-19 gtable_0.3.3
## [215] KernSmooth_2.23-22 miniUI_0.1.1.1
## [217] deldir_1.0-9 htmltools_0.5.5
## [219] bit64_4.0.5 spatstat.explore_3.2-1
## [221] lifecycle_1.0.3 nloptr_2.0.3
## [223] sass_0.4.7 vctrs_0.6.3
## [225] spatstat.geom_3.2-4 snakecase_0.11.0
## [227] DOSE_3.26.1 ggfun_0.1.1
## [229] sp_2.0-0 future.apply_1.11.0
## [231] bslib_0.5.0 pillar_1.9.0
## [233] magick_2.8.1 jsonlite_1.8.7
## [235] markdown_1.7 GetoptLong_1.0.5
includes also saving data for supplementary
#eval=FALSE
source_rmd <- function(rmd_file){
knitr::knit(rmd_file, output = tempfile())
}
source_rmd("Make_my_figurepanels.Rmd")
## make one Pdf
library(qpdf)
qpdf::pdf_combine(
input = c("../Manuscript/Figure_1.pdf",
"../Manuscript/Figure_2.pdf",
"../Manuscript/Figure_3.pdf",
"../Manuscript/Figure_4.pdf",
"../Manuscript/Figure_5.pdf",
"../Manuscript/Figure_6.pdf",
"../Manuscript/Figure_7.pdf"),
output = "../Manuscript/Lautenbach_etal_mainfigures.pdf"
)
qpdf::pdf_combine(
input = c("../Manuscript/Figure_1_S1.pdf",
"../Manuscript/Figure_1_S2.pdf",
"../Manuscript/Figure_1_S3.pdf",
"../Manuscript/Figure_2_S4.pdf",
"../Manuscript/Figure_2_S5.pdf",
"../Manuscript/Figure_3_S6.pdf",
"../Manuscript/Figure_3_S7.pdf",
"../Manuscript/Figure_4_S8.pdf",
"../Manuscript/Figure_5_S9.pdf"
),
output = "../Manuscript/Lautenbach_etal_supplementaryfigures.pdf"
)