Chapter 6 TCGA PanCanAtlas

p53 signature genes were defined as followed.

p53 targets: The 116 p53 targets.

WT-up gene: in at least five cohorts, expression of this gene in the samples harboring wild-type p53 is significantly higher than expression of this gene in the samples harboring highly frequent p53 mutations (Significance: fold change > 1.5 & adjust P < 0.05; highly frequent mutations are the ones reliably inactivate p53 wild-type functions: the missense/frameshift/insertion/deletion/stop-gain/splicing mutations with > 0.5% frequency).

MUT-up gene: in at least five cohorts, expression of this gene in the samples harboring highly frequent p53 missense mutations is significantly higher than expression of this gene in the samples harboring wild-type p53 (Significance: fold change > 1.5 & adjust P < 0.05; highly frequent missense mutations are the ones most likely possessing of GOF: the missense mutations with > 0.5% frequency).

6.1 WT-up genes

# DEGs between p53 mut (top mutations) and p53 wt 

enroll.data <- tcga.mutation.tp53[tcga.mutation.tp53$Amino_Acid_Change %in% tp53.mut.topall$Mutation, ]


total.deg <- NULL
for (kkk in 1:length(TCGA.ALL)) {
  pdata <- tcga.meta
  sum(pdata$sample != colnames(tcga.fpkm))
  
  pdata$contrast <- "other"
  pdata$contrast[which(pdata$TP53mut == "Yes" & pdata$project_id == TCGA.ALL[kkk] & pdata$sample %in% enroll.data$Sample_ID)] <- "c2"
  pdata$contrast[which(pdata$TP53mut == "No" & pdata$project_id == TCGA.ALL[kkk] & pdata$WithMutation == "Yes")] <- "c1"
  
  message(kkk, "  ", TCGA.ALL[kkk], "   ", sum(pdata$contrast %in% c("c1")), "   ", sum(pdata$contrast %in% c("c2")) )
  
  if (length(unique(pdata$contrast)) == 3 & sum(table(pdata$contrast) < 1) == 0 ) {
    pdata.sub <- pdata[which(pdata$contrast != "other"), ]
    design <- model.matrix(~ 0 + as.factor(contrast), data = pdata.sub)
    colnames(design) <- str_replace_all(colnames(design), fixed("as.factor(contrast)"), "")
    contrast <- makeContrasts(c1_c2 = c1 - c2, levels = design)
    fits <- contrasts.fit(fit, contrast)
    ebFit <- eBayes(fits)
    deg_sig_list <- topTable(ebFit, coef = 1, adjust.method = 'fdr', number = Inf)
    deg.data <- deg_sig_list[which(!is.na(deg_sig_list$P.Value)), ]
    deg.data$logP <- -log10(deg.data$P.Value)
    deg.data$logFDR <- -log10(deg.data$adj.P.Val)
    deg.data$group = "zz"
    deg.data$group[which( (deg.data$adj.P.Val < 0.05) & (deg.data$logFC > 0.58) )] = "up"
    deg.data$group[which( (deg.data$adj.P.Val < 0.05) & (deg.data$logFC < -0.58) )] = "down"
    deg.data$tag <- paste0("TP53 wt -vs- mut")
    deg.data$Gene <- rownames(deg.data)
    deg.data$Cohort <- TCGA.ALL[kkk]
    deg.data <- deg.data[which(deg.data$group != "zz"), ]
    total.deg <- rbind(total.deg, deg.data)
}

total.deg$Cohort <- factor(as.character(total.deg$Cohort), levels = TCGA.ALL)
saveRDS(total.deg, "02.tcga/degs_tp53_wtup_top.rds")

total.deg.wtup <- readRDS("02.tcga/degs_tp53_wtup_top.rds")

tp53.wtup.top <- table(total.deg.wtup[, c("Gene","Cohort")])
tp53.wtup.top <- tp53.wtup.top[order(rowSums(tp53.wtup.top), decreasing = T), ]
dim(tp53.wtup.top)

## [1] 5694   33

plot.data <- NULL
for (i in 1:5) {
  sub <- data.frame(Var1 = i, Freq = sum(rowSums(tp53.wtup.top) >= i))
  plot.data <- rbind(plot.data, sub)
}
p <- ggbarplot(plot.data, x = "Var1", y = "Freq",
               color = "black", fill = "black", size = 0,
               label = TRUE, lab.pos = c("out"), lab.size = 4,
               lab.vjust = -0.5, 
               lab.hjust = 0.5,
               main = "The p53 WT-up genes",
               xlab = "", ylab = "Gene Count")
p <- p + theme_base() + theme(plot.background = element_blank())
p

gene.tp53.wtup.top <- row.names(tp53.wtup.top[rowSums(tp53.wtup.top) >= 5, ])
length(gene.tp53.wtup.top)

## [1] 157

6.2 MUT-up genes

# DEGs between p53 mut (top missense mutations) and p53 wt 

enroll.data <- tcga.mutation.tp53[tcga.mutation.tp53$Amino_Acid_Change %in% tp53.mut.topmis$Mutation, ]
table(enroll.data$Class)

total.deg <- NULL
for (kkk in 1:length(TCGA.ALL)) {
  pdata <- tcga.meta
  sum(pdata$sample != colnames(tcga.fpkm))
  
  pdata$contrast <- "other"
  pdata$contrast[which(pdata$TP53mut == "Yes" & pdata$project_id == TCGA.ALL[kkk] & pdata$sample %in% enroll.data$Sample_ID)] <- "c1"
  pdata$contrast[which(pdata$TP53mut == "No" & pdata$project_id == TCGA.ALL[kkk] & pdata$WithMutation == "Yes")] <- "c2"
  
  message(kkk, "  ", TCGA.ALL[kkk], "   ", sum(pdata$contrast %in% c("c1")), "   ", sum(pdata$contrast %in% c("c2")) )
  
  if (length(unique(pdata$contrast)) == 3 & sum(table(pdata$contrast) < 1) == 0 ) {
    pdata.sub <- pdata[which(pdata$contrast != "other"), ]
    design <- model.matrix(~ 0 + as.factor(contrast), data = pdata.sub)
    colnames(design) <- str_replace_all(colnames(design), fixed("as.factor(contrast)"), "")
    contrast <- makeContrasts(c1_c2 = c1 - c2, levels = design)
    fits <- contrasts.fit(fit, contrast)
    ebFit <- eBayes(fits)
    deg_sig_list <- topTable(ebFit, coef = 1, adjust.method = 'fdr', number = Inf)
    deg.data <- deg_sig_list[which(!is.na(deg_sig_list$P.Value)), ]
    deg.data$logP <- -log10(deg.data$P.Value)
    deg.data$logFDR <- -log10(deg.data$adj.P.Val)
    deg.data$group = "zz"
    deg.data$group[which( (deg.data$adj.P.Val < 0.05) & (deg.data$logFC > 0.58) )] = "up"
    deg.data$group[which( (deg.data$adj.P.Val < 0.05) & (deg.data$logFC < -0.58) )] = "down"
    deg.data$tag <- paste0("TP53 mut -vs- wt")
    deg.data$Gene <- rownames(deg.data)
    deg.data$Cohort <- TCGA.ALL[kkk]
    deg.data <- deg.data[which(deg.data$group != "zz"), ]
    total.deg <- rbind(total.deg, deg.data)
}

total.deg$Cohort <- factor(as.character(total.deg$Cohort), levels = TCGA.ALL)
saveRDS(total.deg, "02.tcga/degs_tp53_mutup_top.rds")

total.deg.mutup <- readRDS("02.tcga/degs_tp53_misup_top.rds")

tp53.mutup.top <- table(total.deg.mutup[, c("Gene","Cohort")])
tp53.mutup.top <- tp53.mutup.top[order(rowSums(tp53.mutup.top), decreasing = T), ]
dim(tp53.mutup.top)

## [1] 5498   33

plot.data <- NULL
for (i in 1:5) {
  sub <- data.frame(Var1 = i, Freq = sum(rowSums(tp53.mutup.top) >= i))
  plot.data <- rbind(plot.data, sub)
}
p <- ggbarplot(plot.data, x = "Var1", y = "Freq",
               color = "black", fill = "black", size = 0,
               label = TRUE, lab.pos = c("out"), lab.size = 4,
               lab.vjust = -0.5, 
               lab.hjust = 0.5,
               main = "The p53 WT-up genes",
               xlab = "", ylab = "Gene Count")
p <- p + theme_base() + theme(plot.background = element_blank())
p

gene.tp53.mutup.top <- row.names(tp53.mutup.top[rowSums(tp53.mutup.top) >= 5, ])
length(gene.tp53.mutup.top)

## [1] 140

6.3 HR of high expression

HR of high expression of p53 signature genes among TCGA cancer types.

6.3.1 116 p53 targets

HR of high expression of the 116 p53 targets among 33 TCGA cancer cohorts. In each cohort, patients are separated by the median value of gene expression, followed by HR values calculation using the univariable Cox regression analysis.

TCGA.ALL <- c("TCGA-OV","TCGA-UCS","TCGA-LUSC","TCGA-ESCA","TCGA-READ","TCGA-HNSC","TCGA-PAAD","TCGA-COAD","TCGA-LUAD","TCGA-BLCA","TCGA-STAD","TCGA-LGG","TCGA-UCEC","TCGA-SARC","TCGA-BRCA","TCGA-GBM","TCGA-LIHC","TCGA-KICH","TCGA-ACC","TCGA-MESO","TCGA-DLBC","TCGA-PRAD","TCGA-CHOL","TCGA-SKCM","TCGA-CESC","TCGA-LAML","TCGA-THYM","TCGA-KIRC","TCGA-KIRP","TCGA-TGCT","TCGA-THCA","TCGA-PCPG","TCGA-UVM")

plot.data <- read.xlsx("02.tcga/Table6.xlsx", sheet = 1, startRow = 2)

plot.data <- plot.data[, 1:7]
plot.data <- plot.data[which(!is.na(plot.data$TCGA.Cohort)), ]
gene.list <- unique(plot.data$p53.targets)
length(gene.list)

## [1] 116

plot.mat <- matrix(0, nrow = length(gene.list), ncol = length(TCGA.ALL))
rownames(plot.mat) <- gene.list
colnames(plot.mat) <- TCGA.ALL
plot.mat.hr <- plot.mat
for (i in 1:ncol(plot.mat)) {
  sub <- plot.data[which(plot.data$TCGA.Cohort == colnames(plot.mat)[i]), ]
  sub <- sub[which(sub$P.value < 0.05), ]
  plot.mat.hr[match(sub$p53.targets, rownames(plot.mat.hr)), i] <- log2(sub$HR)
  sub.1 <- sub[which(sub$P.value < 0.05 & sub$HR > 1), ]
  if (nrow(sub.1) > 0) plot.mat[match(sub.1$p53.targets, rownames(plot.mat)), i] <- 1
  sub.2 <- sub[which(sub$P.value < 0.05 & sub$HR < 1), ]
  if (nrow(sub.2) > 0) plot.mat[match(sub.2$p53.targets, rownames(plot.mat)), i] <- -1
}
stat.gene <- data.frame(Gene = rownames(plot.mat), 
                        SigCount = rowSums(plot.mat == -1) - rowSums(plot.mat == 1),
                        SigHR = rowMeans(plot.mat.hr * -1) )
stat.gene <- stat.gene[order(stat.gene$SigCount, stat.gene$SigHR, decreasing = T), ]

stat.cohort <- data.frame(Cohort = colnames(plot.mat), 
                          SigCount = colSums(plot.mat == -1) - colSums(plot.mat == 1),
                          SigHR = colMeans(plot.mat.hr * -1) )
stat.cohort <- stat.cohort[order(stat.cohort$SigCount, stat.cohort$SigHR, decreasing = T), ]

p <- pheatmap(plot.mat[stat.gene$Gene, stat.cohort$Cohort], scale = "none",
              color = colorRampPalette(c("#00599F","#EEEEEE","#D01910"))(3),
              cluster_row = F, cluster_col = F, border_color = NA,
              fontsize_col = 10,
              fontsize_row = 2)
p

6.3.2 WT-up genes

HR of high expression of the 157 WT-up genes among 33 TCGA cancer cohorts. In each cohort, patients are separated by the median value of gene expression, followed by HR values calculation using the univariable Cox regression analysis.

plot.data <- read.xlsx("02.tcga/Table6.xlsx", sheet = 1, startRow = 2)

plot.data <- plot.data[, 8:14]
plot.data <- plot.data[which(!is.na(plot.data$TCGA.Cohort)), ]
gene.list <- unique(plot.data$`WT-up.genes`)
length(gene.list)

## [1] 157

plot.mat <- matrix(0, nrow = length(gene.list), ncol = length(TCGA.ALL))
rownames(plot.mat) <- gene.list
colnames(plot.mat) <- TCGA.ALL
plot.mat.hr <- plot.mat
for (i in 1:ncol(plot.mat)) {
  sub <- plot.data[which(plot.data$TCGA.Cohort == colnames(plot.mat)[i]), ]
  sub <- sub[which(sub$P.value < 0.05), ]
  plot.mat.hr[match(sub$`WT-up.genes`, rownames(plot.mat.hr)), i] <- log2(sub$HR)
  sub.1 <- sub[which(sub$P.value < 0.05 & sub$HR > 1), ]
  if (nrow(sub.1) > 0) plot.mat[match(sub.1$`WT-up.genes`, rownames(plot.mat)), i] <- 1
  sub.2 <- sub[which(sub$P.value < 0.05 & sub$HR < 1), ]
  if (nrow(sub.2) > 0) plot.mat[match(sub.2$`WT-up.genes`, rownames(plot.mat)), i] <- -1
}
stat.gene <- data.frame(Gene = rownames(plot.mat), 
                        SigCount = rowSums(plot.mat == -1) - rowSums(plot.mat == 1),
                        SigHR = rowMeans(plot.mat.hr * -1) )
stat.gene <- stat.gene[order(stat.gene$SigCount, stat.gene$SigHR, decreasing = T), ]

stat.cohort <- data.frame(Cohort = colnames(plot.mat), 
                          SigCount = colSums(plot.mat == -1) - colSums(plot.mat == 1),
                          SigHR = colMeans(plot.mat.hr * -1) )
stat.cohort <- stat.cohort[order(stat.cohort$SigCount, stat.cohort$SigHR, decreasing = T), ]

p <- pheatmap(plot.mat[stat.gene$Gene, stat.cohort$Cohort], scale = "none",
              color = colorRampPalette(c("#00599F","#EEEEEE","#D01910"))(3),
              cluster_row = F, cluster_col = F, border_color = NA,
              fontsize_col = 10,
              fontsize_row = 2)
p

6.3.3 MUT-up genes

HR of high expression of the 140 MUT-up genes among 33 TCGA cancer cohorts. In each cohort, patients are separated by the median value of gene expression, followed by HR values calculation using the univariable Cox regression analysis.

plot.data <- read.xlsx("02.tcga/Table6.xlsx", sheet = 1, startRow = 2)

plot.data <- plot.data[, 15:21]
plot.data <- plot.data[which(!is.na(plot.data$TCGA.Cohort)), ]
gene.list <- unique(plot.data$`MUT-up.genes`)
length(gene.list)

## [1] 140

plot.mat <- matrix(0, nrow = length(gene.list), ncol = length(TCGA.ALL))
rownames(plot.mat) <- gene.list
colnames(plot.mat) <- TCGA.ALL
plot.mat.hr <- plot.mat
for (i in 1:ncol(plot.mat)) {
  sub <- plot.data[which(plot.data$TCGA.Cohort == colnames(plot.mat)[i]), ]
  sub <- sub[which(sub$P.value < 0.05), ]
  plot.mat.hr[match(sub$`MUT-up.genes`, rownames(plot.mat.hr)), i] <- log2(sub$HR)
  sub.1 <- sub[which(sub$P.value < 0.05 & sub$HR > 1), ]
  if (nrow(sub.1) > 0) plot.mat[match(sub.1$`MUT-up.genes`, rownames(plot.mat)), i] <- 1
  sub.2 <- sub[which(sub$P.value < 0.05 & sub$HR < 1), ]
  if (nrow(sub.2) > 0) plot.mat[match(sub.2$`MUT-up.genes`, rownames(plot.mat)), i] <- -1
}
stat.gene <- data.frame(Gene = rownames(plot.mat), 
                        SigCount = rowSums(plot.mat == 1) - rowSums(plot.mat == -1),
                        SigHR = rowMeans(plot.mat.hr * 1) )
stat.gene <- stat.gene[order(stat.gene$SigCount, stat.gene$SigHR, decreasing = T), ]

stat.cohort <- data.frame(Cohort = colnames(plot.mat), 
                          SigCount = colSums(plot.mat == 1) - colSums(plot.mat == -1),
                          SigHR = colMeans(plot.mat.hr * 1) )
stat.cohort <- stat.cohort[order(stat.cohort$SigCount, stat.cohort$SigHR, decreasing = T), ]

p <- pheatmap(plot.mat[stat.gene$Gene, stat.cohort$Cohort], scale = "none",
              color = colorRampPalette(c("#00599F","#EEEEEE","#D01910"))(3),
              cluster_row = F, cluster_col = F, border_color = NA,
              fontsize_col = 10,
              fontsize_row = 2)
p

6.4 Powerfulness in regulating

Heatmap of fold changes of expression of the p53 signature genes between p53-wild-type and p53-mutant tumors in the TCGA Pan-Cancer Atlas.

6.4.1 116 p53 targets

Heatmap of fold changes of expression of the 116 reported p53 target genes between samples with wild-type p53 and samples with frequent mutant p53 (> 0.5% mutation frequency). Note the p53 mutants with > 0.5% mutation frequency are the ones most likely losing wild-type functions.

TCGA.ALL <- c("TCGA-OV","TCGA-UCS","TCGA-LUSC","TCGA-ESCA","TCGA-READ","TCGA-HNSC","TCGA-PAAD","TCGA-COAD","TCGA-LUAD","TCGA-BLCA","TCGA-STAD","TCGA-LGG","TCGA-UCEC","TCGA-SARC","TCGA-BRCA","TCGA-GBM","TCGA-LIHC","TCGA-KICH","TCGA-ACC","TCGA-MESO","TCGA-DLBC","TCGA-PRAD","TCGA-CHOL","TCGA-SKCM","TCGA-CESC","TCGA-LAML","TCGA-THYM","TCGA-KIRC","TCGA-KIRP","TCGA-TGCT","TCGA-THCA","TCGA-PCPG","TCGA-UVM")

plot.data <- read.xlsx("02.tcga/Table7.xlsx", sheet = 1, startRow = 2)

plot.data <- plot.data[, 1:6]
plot.data <- plot.data[which(!is.na(plot.data$TCGA.Cohort)), ]
gene.list <- unique(plot.data$p53.targets)
length(gene.list)

## [1] 116

plot.mat <- matrix(NA, nrow = length(gene.list), ncol = length(TCGA.ALL))
rownames(plot.mat) <- gene.list
colnames(plot.mat) <- TCGA.ALL
for (i in 1:ncol(plot.mat)) {
  sub <- plot.data[which(plot.data$TCGA.Cohort == colnames(plot.mat)[i]), ]
  if (nrow(sub) > 0) {
    plot.mat[, i] <- 0
    sub.1 <- sub[which(sub$log2FC > 0), ]
    if (nrow(sub.1) > 0) plot.mat[sub.1$p53.targets, i] <- 1
    sub.2 <- sub[which(sub$log2FC > 0.58 & sub$adj.P.Val < 0.05), ]
    if (nrow(sub.2) > 0) plot.mat[sub.2$p53.targets, i] <- 2
  }
}
stat.cohort <- data.frame(Cohort = colnames(plot.mat), 
                          SigCount = colSums(plot.mat == 2),
                          NotSigCount = colSums(plot.mat == 1) )
stat.cohort <- stat.cohort[order(stat.cohort$SigCount, stat.cohort$NotSigCount, decreasing = T), ]

plot.mat <- plot.mat[, stat.cohort$Cohort]
p <- pheatmap(plot.mat, scale = "none",
              color = colorRampPalette(c("#fffef3","#edd1ff","#984ac9"))(501),
              cluster_row = F, cluster_col = F, border_color = NA,
              clustering_method = "ward.D2",
              display_numbers = FALSE,
              number_format = "%.f", number_color = "black",
              clustering_distance_rows = "manhattan",
              clustering_distance_cols = "manhattan",
              fontsize_col = 10,
              fontsize_row = 1)
p

6.4.2 WT-up genes

Heatmap of fold changes of expression of the 157 WT-up genes between samples with wild-type p53 and samples with frequent mutant p53 (> 0.5% mutation frequency). Note the p53 mutants with > 0.5% mutation frequency are the ones most likely losing wild-type functions.

plot.data <- read.xlsx("02.tcga/Table7.xlsx", sheet = 1, startRow = 2)

plot.data <- plot.data[, 7:12]
plot.data <- plot.data[which(!is.na(plot.data$TCGA.Cohort)), ]
gene.list <- unique(plot.data$`WT-up.genes`)
length(gene.list)

## [1] 157

plot.mat <- matrix(NA, nrow = length(gene.list), ncol = length(TCGA.ALL))
rownames(plot.mat) <- gene.list
colnames(plot.mat) <- TCGA.ALL
for (i in 1:ncol(plot.mat)) {
  sub <- plot.data[which(plot.data$TCGA.Cohort == colnames(plot.mat)[i]), ]
  if (nrow(sub) > 0) {
    plot.mat[, i] <- 0
    sub.1 <- sub[which(sub$log2FC > 0), ]
    if (nrow(sub.1) > 0) plot.mat[sub.1$`WT-up.genes`, i] <- 1
    sub.2 <- sub[which(sub$log2FC > 0.58 & sub$adj.P.Val < 0.05), ]
    if (nrow(sub.2) > 0) plot.mat[sub.2$`WT-up.genes`, i] <- 2
  }
}
stat.cohort <- data.frame(Cohort = colnames(plot.mat), 
                          SigCount = colSums(plot.mat == 2),
                          NotSigCount = colSums(plot.mat == 1) )
stat.cohort <- stat.cohort[order(stat.cohort$SigCount, stat.cohort$NotSigCount, decreasing = T), ]

plot.mat <- plot.mat[, stat.cohort$Cohort]
p <- pheatmap(plot.mat, scale = "none",
              color = colorRampPalette(c("#fffef3","#edd1ff","#984ac9"))(501),
              cluster_row = F, cluster_col = F, border_color = NA,
              clustering_method = "ward.D2",
              display_numbers = FALSE,
              number_format = "%.f", number_color = "black",
              clustering_distance_rows = "manhattan",
              clustering_distance_cols = "manhattan",
              fontsize_col = 10,
              fontsize_row = 1)
p

6.4.3 MUT-up genes

Heatmap of fold changes of expression of the 140 MUT-up genes between samples with frequent missense mutant p53 (> 0.5% mutation frequency) and samples with wild-type p53. Note the frequent missense mutant p53 with > 0.5% mutation frequency are the ones most likely possessing of GOF.

plot.data <- read.xlsx("02.tcga/Table7.xlsx", sheet = 1, startRow = 2)

plot.data <- plot.data[, 13:18]
plot.data <- plot.data[which(!is.na(plot.data$TCGA.Cohort)), ]
gene.list <- unique(plot.data$`MUT-up.genes`)
length(gene.list)

## [1] 140

plot.mat <- matrix(NA, nrow = length(gene.list), ncol = length(TCGA.ALL))
rownames(plot.mat) <- gene.list
colnames(plot.mat) <- TCGA.ALL
for (i in 1:ncol(plot.mat)) {
  sub <- plot.data[which(plot.data$TCGA.Cohort == colnames(plot.mat)[i]), ]
  if (nrow(sub) > 0) {
    plot.mat[, i] <- 0
    sub.1 <- sub[which(sub$log2FC > 0), ]
    if (nrow(sub.1) > 0) plot.mat[sub.1$`MUT-up.genes`, i] <- 1
    sub.2 <- sub[which(sub$log2FC > 0.58 & sub$adj.P.Val < 0.05), ]
    if (nrow(sub.2) > 0) plot.mat[sub.2$`MUT-up.genes`, i] <- 2
  }
}
stat.cohort <- data.frame(Cohort = colnames(plot.mat), 
                          SigCount = colSums(plot.mat == 2),
                          NotSigCount = colSums(plot.mat == 1) )
stat.cohort <- stat.cohort[order(stat.cohort$SigCount, stat.cohort$NotSigCount, decreasing = T), ]

plot.mat <- plot.mat[, stat.cohort$Cohort]
p <- pheatmap(plot.mat, scale = "none",
              color = colorRampPalette(c("#fffef3","#edd1ff","#984ac9"))(501),
              cluster_row = F, cluster_col = F, border_color = NA,
              clustering_method = "ward.D2",
              display_numbers = FALSE,
              number_format = "%.f", number_color = "black",
              clustering_distance_rows = "manhattan",
              clustering_distance_cols = "manhattan",
              fontsize_col = 10,
              fontsize_row = 1)
p

6.5 HR for p53 mutations

HR for p53 mutations in 33 TCGA cancer types.

tcga.meta <- read.xlsx("02.tcga/Table5.xlsx", sheet = 1, startRow = 2)

plot.data <- tcga.meta[, 1:10]
plot.data <- plot.data[which(plot.data$With.survival.data == 1 & plot.data$Sample.type == "Primary Tumor" & plot.data$MutationReported == "Yes"), ]
table(plot.data$TCGA.cohort[which(plot.data$p53mut == "Yes")]) %>% sort(decreasing = T)

## integer(0)

# select p53 mutated samples > 5
TCGA.OS <- c("TCGA-KICH","TCGA-KIRC","TCGA-ACC","TCGA-LAML","TCGA-UCEC","TCGA-PAAD","TCGA-LIHC","TCGA-BRCA","TCGA-HNSC","TCGA-LUAD","TCGA-KIRP","TCGA-PRAD","TCGA-CESC","TCGA-ESCA","TCGA-COAD","TCGA-BLCA","TCGA-SARC","TCGA-STAD","TCGA-OV","TCGA-MESO","TCGA-READ","TCGA-LUSC","TCGA-SKCM","TCGA-UCS","TCGA-GBM","TCGA-LGG")

plot.data$OS_month_5y <- plot.data$`OS.month.(5.years)`
plot.data$OS_status_5y <- plot.data$`OS.status.(5.years)`
plot.data$p53mut <- plot.data$`p53-mut`
total.list <- list()
out.data <- NULL
for (j in 1:length(TCGA.OS)) {
  sub <- plot.data[which(plot.data$TCGA.cohort == TCGA.OS[j]), ]
  if (length(unique(sub$p53mut)) > 1) {
    fit <- coxph(Surv(OS_month_5y, OS_status_5y) ~ p53mut, data = sub)
    fit.info <- summary(fit)
    sub.p <- survdiff(Surv(OS_month_5y, OS_status_5y) ~ p53mut, data = sub)$pvalue
    fit.info.out <- c("p53", TCGA.OS[j], fit.info$conf.int[1], 
                      fit.info$conf.int[3], fit.info$conf.int[4], sub.p )
    out.data <- rbind(out.data, fit.info.out)
  }
  fit <- survfit(Surv(OS_month_5y, OS_status_5y) ~ p53mut, data = sub)
  p <- ggsurvplot(fit, data = sub,
                  xlab = 'Time (Months)',
                  pval = TRUE,
                  risk.table = FALSE, 
                  risk.table.height = 0.28, 
                  conf.int.alpha = 0.05,
                  conf.int = TRUE, surv.median.line = 'hv',
                  palette = c("blue","red"),
                  axes.offset = TRUE,
                  break.time.by = 12,  xlim = c(0, 60),
                  title= TCGA.OS[j] ) 
  total.list <- c(total.list, list(p = p))
}

## Warning in .add_surv_median(p, fit, type = surv.median.line, fun = fun, : Median survival not reached.

## Warning in .add_surv_median(p, fit, type = surv.median.line, fun = fun, : Median survival not reached.

## Warning in .add_surv_median(p, fit, type = surv.median.line, fun = fun, : Median survival not reached.

## Warning in .add_surv_median(p, fit, type = surv.median.line, fun = fun, : Median survival not reached.

## Warning in .add_surv_median(p, fit, type = surv.median.line, fun = fun, : Median survival not reached.

## Warning in .add_surv_median(p, fit, type = surv.median.line, fun = fun, : Median survival not reached.

## Warning in .add_surv_median(p, fit, type = surv.median.line, fun = fun, : Median survival not reached.

## Warning in .add_surv_median(p, fit, type = surv.median.line, fun = fun, : Median survival not reached.

names(total.list) <- TCGA.OS


out.data <- as.data.frame(out.data)
colnames(out.data) <- c("Gene", "Cohort", "HR", "low.95", "high.95", "P.value")
for (i in 3:6) {out.data[, i] <- as.numeric(as.character(out.data[, i]))}
out.data$Prognosis <- "Others"
out.data$Prognosis[which(out.data$HR > 1 & out.data$P.value < 0.05)] <- "Unfavourable"
out.data$Prognosis[which(out.data$HR < 1 & out.data$P.value < 0.05)] <- "Favourable"
rownames(out.data) <- NULL
print(out.data)

p <- arrange_ggsurvplots(total.list, ncol = 7, nrow = 4, print = FALSE)
p

6.6 MDM2 expression

Boxplot of MDM2 gene expression in TCGA Pan-Cancer Atlas. Fragments Per Kilobase of transcript per Million mapped reads (FPKM) is used for evaluating gene expression.

tcga.meta <- read.xlsx("02.tcga/Table8.xlsx", sheet = 1, startRow = 2)

plot.order <- aggregate(tcga.meta$MDM2.expression, list(Cohort = tcga.meta$TCGA.cohort), median)
plot.order <- plot.order[order(plot.order$x, decreasing = F), ]
rownames(plot.order) <- NULL
plot.order

p <- ggboxplot(tcga.meta, 
               x = "TCGA.cohort", y = "MDM2.expression",
               color = "black", fill = "#b2ebff",
               size = 0.5, width = 0.7,
               xlab = "", ylab = "MDM2 expression",
               order = plot.order$Cohort,
               main = "MDM2",
               outlier.shape = NA,
               outlier.size = 1,
               legend = "bottom")
p <- p + theme_base() + theme(plot.background = element_blank())
p <- p + theme(axis.text.x = element_text(angle = 90,hjust = 1,vjust = 0.5))
p

6.6.1 Radar plot

Radar plots predicting treatment sensitivity of p53 rescue therapy in the indicated cancer types

tcga.rank <- read.xlsx("02.tcga/Table9.xlsx", sheet = 1, startRow = 2)
tcga.rank$CohortSim <- str_replace_all(tcga.rank$Cohort, "TCGA-", "")

idx <- c("Rank.(HR.of.high.expression.of.p53.targets);.data.derived.from.S1K",
         "Rank.(HR.of.high.expression.of.WT-up.genes);.data.derived.from.S1K",
         "Rank.(HR.of.high.expression.of.MUT-up.genes);.data.derived.from.S1K",
         "Rank.(HR.of.p53-mut);.data.derived.from.S1M",
         "Rank.(MDM2.expression);.data.derived.from.S1N",
         "Rank.(Powerfulness.in.regulating.MUT-up.genes);.data.derived.from.S1L",
         "Rank.(Powerfulness.in.regulating.WT-up.genes);.data.derived.from.S1L",
         "Rank.(Powerfulness.in.regulating.p53.targets);.data.derived.from.S1L" )
idx[!idx %in% colnames(tcga.rank)]

## character(0)

par(mfrow = c(7, 5))
for (kkk in 1:nrow(tcga.rank)) {
  plot.mat <- tcga.rank[kkk, idx ]
  plot.mat <- rbind(rep(33, length(plot.mat)), rep(0, length(plot.mat)), plot.mat)
  colnames(plot.mat) <- c("HR.targets","HR.WT-up","HR.MUT-up","HR.p53","MDM2","power.MUT-up","power.WT-up","power.targets")
  radarchart(as.data.frame(plot.mat), 
             pcol = c( "#dd0000"),
             pfcol = scales::alpha(c("#f28926"),0.5),
             plwd = 2, plty = 1, axistype = 1,  
             caxislabels = seq(0, 33, 11), seg = 3,
             cglcol = "grey", cglty = 1, cglwd = 0.8, axislabcol = "black", 
             vlcex = 1.1, pangle=c(10, 45, 120) )
  title(paste0(tcga.rank$CohortSim[kkk], " (", tcga.rank$Total.score[kkk], ")") )
}