基础分析
library(tidyverse)
library(ggpubr)
library(ggthemes)
library(gmodels)
library(patchwork)
library(limma)
library(openxlsx)
library(pheatmap)
library(XGR) # BiocManager::install("hfang-bristol/XGR", dependencies=T)
library(PIONE) # BiocManager::install("hfang-bristol/PIONE", dependencies=T)
dir.create("./results/RNA",recursive = T)
#----------------------------------------------------------------------------------
# Step 1: Load the Data
#----------------------------------------------------------------------------------
rna <- readRDS("./data/20231113_RNA_log2.rds")
meta <- readRDS("./data/20231113_META.rds")
identical(colnames(rna),rownames(meta))
#----------------------------------------------------------------------------------
# Step 2: Boxplot
#----------------------------------------------------------------------------------
rna.long <- rna %>%
pivot_longer(everything(),names_to = "ID", values_to = "logFPKM") %>%
left_join(meta,by="ID")
p <- ggboxplot(rna.long, x="ID", y="logFPKM",
ylab="log2FPKM",xlab = "Sample") +
theme_base() +
theme(plot.background = element_blank())
ggsave("./results/RNA/1.Box.png",p,width = 8,height = 5)
#----------------------------------------------------------------------------------
# Step 3: PCA
#----------------------------------------------------------------------------------
pca.info <- fast.prcomp(rna)
pca.data <- data.frame(sample = rownames(pca.info$rotation),
Type = meta$Type,
pca.info$rotation)
p1 <- ggscatter(pca.data, x = "PC1", y = "PC2",
color = "Type",
ellipse = TRUE,
size = 3,
label = "sample" ,font.label = c(10, "plain"),
main = "PCA") + theme_base() +
theme(plot.background = element_blank())
p2 <- ggscatter(pca.data, x = "PC2", y = "PC3",
color = "Type",
ellipse = TRUE,
size = 3,
label = "sample" ,font.label = c(10, "plain"),
main = "PCA") + theme_base() +
theme(plot.background = element_blank())
p <- p1+p2+plot_layout(guides="collect")
ggsave("./results/RNA/2.PCA.png",p,width = 12,height = 5)
#----------------------------------------------------------------------------------
# Step 4: DEG
#----------------------------------------------------------------------------------
## limma
meta$contrast <- as.factor(meta$Type)
design <- model.matrix(~ 0 + contrast , data = meta)
fit <- lmFit(rna, design)
contrast <- makeContrasts( KO_WT = contrastKO - contrastWT ,
levels = design)
fits <- contrasts.fit(fit, contrast)
ebFit <- eBayes(fits)
## result
# KO_WT
res <- topTable(ebFit, coef = "KO_WT", adjust.method = 'fdr', number = Inf)
limma.res <- res %>% filter(!is.na(adj.P.Val)) %>%
mutate( logP = -log10(P.Value) ) %>%
mutate( contrast = "KO_WT") %>%
mutate( logFDR = -log10(adj.P.Val) ) %>%
mutate( GeneSymbol = rownames(res)) %>%
dplyr::select(GeneSymbol,everything()) %>%
as_tibble()
## cutoff: adj.P.Val < 0.05 && |FC| > 1.5
limma.res <- limma.res %>% mutate(group = case_when( adj.P.Val<0.05&logFC>0.58 ~ "up",
adj.P.Val<0.05&logFC< -0.58 ~ "down",
.default = "not sig"))
limma.res %>% count(group)
## output
write.xlsx( limma.res, "./results/RNA/3.Limma_fdr0.05_fc1.5.xlsx", overwrite = T, rowNames = F)
saveRDS(limma.res,"./results/RNA/3.Limma_fdr0.05_fc1.5.rds")
#----------------------------------------------------------------------------------
# Step 5: Volcano
#----------------------------------------------------------------------------------
## volcano
# KO_WT
pdata <- limma.res %>% mutate(group=factor(group,levels = c("up","down","not sig")))
my_label <- paste0( "Cutoff:FDR<0.05 & |FC|>1.5 ; \n" , "Up:",table(pdata$group)[1]," ; " ,"Down:" , table(pdata$group)[2])
# label top 20 sig genes
degs.1 <- limma.res %>% arrange(P.Value) %>% filter(group=="up") %>% slice(1:20) %>% pull(GeneSymbol)
degs.2 <- limma.res %>% arrange(P.Value) %>% filter(group=="down") %>% slice(1:20) %>% pull(GeneSymbol)
pdata <- pdata %>% mutate(label=case_when(GeneSymbol %in% c(degs.1,degs.2) ~ GeneSymbol,
.default = ""))
# plot
p <- ggscatter(pdata,
x = "logFC", y = "logFDR",
color = "group", size = 0.5,
main = paste0("KO vs WT") ,
xlab = "log2FoldChange", ylab = "-log10(FDR)",
palette = c("#CC0000","#339900","#444444"),
label = pdata$label,font.label = 10, repel = T,
xlim = c(-10, 10)
)+
theme_base()+
geom_hline(yintercept = -log10(0.05), linetype="dashed", color = "#222222") +
geom_vline(xintercept = log2(1.5) , linetype="dashed", color = "#222222")+
geom_vline(xintercept = -log2(1.5) , linetype="dashed", color = "#222222")+
labs(subtitle = my_label) +
theme(plot.background = element_blank())
ggsave("./results/RNA/4.Volcano.png", p, width = 8, height = 8)
ggsave("./results/RNA/4.Volcano.pdf", p, width = 8, height = 8)
#----------------------------------------------------------------------------------
# Step 6: GO Enrichment
#----------------------------------------------------------------------------------
placeholder <- "http://www.comptransmed.pro/bigdata_ctm"
## Gene ontology
#sets <- tibble(onto=c('GOBP','GOCC','GOMF')) %>%
# mutate(set=map(onto,~oRDS(str_c("org.Mm.eg",.x),placeholder=placeholder))) ## 小鼠
sets <- tibble(onto=c('GOBP','GOCC','GOMF')) %>% # KEGG
mutate(set=map(onto,~oRDS(str_c("org.Hs.eg",.x),placeholder=placeholder))) ## 人
## KO_WT
# DE genes
deg_vec <- limma.res %>% filter(group!="not sig") %>% pull(GeneSymbol) %>% unique()
# enrichment
esad <- oSEAadv(deg_vec, sets, size.range=c(15,1500), test="fisher", min.overlap=5)
df_eTerm <- esad %>% oSEAextract() %>%
filter(adjp<5e-2, distance==3) %>%
mutate(group=namespace) %>% group_by(group) %>% top_n(100,-adjp) %>%
arrange(group,adjp)
# output
df_eTerm %>% openxlsx::write.xlsx("./results/RNA/5.Enrichment_GO_KO_WT.xlsx")
# plot : oSEAballoon
gp <- df_eTerm %>% oSEAballoon(top=20, adjp.cutoff=0.05, zlim=NULL, slim=c(0,100), size.range=c(0.5,2), shape=19, colormap="grey90-orange-darkred")
ggsave("./results/RNA/5.Enrichment_GO_KO_WT.png", gp, width=5, height=5)
进阶分析
#----------------------------------------------------------------------------------
# Step 7: KEGG pathway Crosstalk analysis
#----------------------------------------------------------------------------------
## define KEGG-merged gene interaction network
placeholder <- "http://www.comptransmed.pro/bigdata_ctm"
ig.KEGG.category <- oRDS('ig.KEGG.merged', placeholder=placeholder)
## input : genes , p-value
data.net <- limma.res %>% dplyr::select(GeneSymbol,P.Value)
## maximum-scoring subnetwork
subg <- xSubneterGenes(data.net,network.customised=ig.KEGG.category, subnet.size=30)
# result
subg <- subg %>% xLayout("layout_with_kk")
df_subg <- tibble(Symbol=V(subg)$name) %>% inner_join(limma.res, by=c('Symbol'='GeneSymbol'))
V(subg)$logP <- df_subg$logP
V(subg)$logFDR <- -log10(df_subg$adj.P.Val)
V(subg)$logFC <- df_subg$logFC
# output
out <- igraph::as_data_frame(subg, what="vertices") %>% as_tibble() %>% arrange(-logP) %>% mutate(gene=name)
write.xlsx(out,"./results/RNA/6.KEGG_subg.KO_WT.xlsx",overwrite = T)
saveRDS(subg,file = "./results/RNA/6.KEGG_subg.KO_WT.rds")
# plot
gg_subg <- xGGnetwork(subg,
node.label='name', node.label.size=3,
node.label.color='black', node.label.alpha=1,
node.label.padding=0.1, node.label.arrow=0,
node.label.force=0.1,
node.xcoord="xcoord", node.ycoord="ycoord",
node.color="logFC", colormap="spectral",
zlim=c(-3,3), node.color.title='log2FC',
node.color.alpha=0.8, node.size="logP", node.size.range=c(1,10),
slim=c(1,6), node.size.title="-log10(Pvalue)", edge.color="steelblue1",
edge.color.alpha=0.5,edge.curve=0,edge.arrow.gap=0.01,
title=paste0("Maximum-scoring KEGG subnetwork in KO vs WT"))
ggsave("./results/RNA/6.KEGG_subg.KO_WT.png", gg_subg, width=6, height=5)
ggsave("./results/RNA/6.KEGG_subg.KO_WT.pdf", gg_subg, width=6, height=5)
## network KEGG pathway enrichment
placeholder <- "http://www.comptransmed.pro/bigdata_ctm"
sets <- tibble(onto=c('KEGG')) %>%
mutate(set=map(onto,~oRDS(str_c("org.Hs.eg",.x),placeholder=placeholder)))
subg.node <- vertex_attr(subg)$name
subg.enrich <- oSEAadv(subg.node,sets, size.range=c(15,1500), test="fisher", min.overlap=5)
subg.enrich.df <- subg.enrich %>% oSEAextract() %>%
mutate(group=namespace) %>%
arrange(adjp) %>%
filter(distance==3) %>%
dplyr::select(name,adjp,distance,everything())
write.xlsx(subg.enrich.df,"./results/RNA/7.KEGG_subg.KO_WT_enrichment.xlsx")
name
|
adjp
|
group
|
nO
|
PI3K-Akt signaling pathway
|
0.0e+00
|
Environmental Process
|
15
|
MAPK signaling pathway
|
0.0e+00
|
Environmental Process
|
12
|
Pathways in cancer
|
0.0e+00
|
Human Disease
|
14
|
Ras signaling pathway
|
1.0e-07
|
Environmental Process
|
10
|
Leishmaniasis
|
2.4e-06
|
Human Disease
|
6
|
ECM-receptor interaction
|
4.5e-06
|
Environmental Process
|
6
|