library(tidyverse)
raw <- read_delim("./data/GSE231706_Normalization.txt")
## 清洗下载的数据
df <- raw %>% dplyr::select(`Gene Symbol`,contains("SCR"),contains("KD")) %>%
rename(Gene=`Gene Symbol`) %>%
distinct(Gene,.keep_all = T) %>%
column_to_rownames("Gene")
colnames(df) <- c("WT1","WT2","WT3","KO1","KO2","KO3")
head(df)
# 删除表达量过低的gene
df <- apply(df,2,FUN = function(x){return(2^x)})
df <- df %>% as.data.frame() %>% mutate(Sum=rowSums(df)) %>% filter(Sum > 6) %>% dplyr::select(-Sum)
df <- log2(df)
# WT1 WT2 WT3 KO1 KO2 KO3
# A4GALT 1.48479 1.64620 1.50918 1.76116 1.68286 1.60380
# AAAS 5.54652 5.57043 5.37821 5.37711 5.41979 5.42553
# AACS 5.00831 5.05298 4.78734 4.76894 4.87734 4.67790
# AADAT 2.87019 2.65707 2.91066 2.80236 3.02523 2.87990
# AAED1 3.43895 3.44727 3.39725 3.38053 3.30732 3.32487
# AAGAB 5.27395 5.24923 5.18787 5.11202 5.20488 5.11748
saveRDS(df,"./data/20231113_RNA_log2.rds")
## 准备样本信息
meta <- data.frame(ID=colnames(df),Type=c(rep("WT",3),rep("KO",3)))
rownames(meta) <- meta$ID
head(meta)
# ID Type
# WT1 WT1 WT
# WT2 WT2 WT
# WT3 WT3 WT
# KO1 KO1 KO
# KO2 KO2 KO
# KO3 KO3 KO
saveRDS(meta,"./data/20231113_META.rds")