Chapter 4 Deconvolution
4.1 SVR
The expample data of APL bulk RNA-seq can be downloaded via APL bulk demo data
The APL cell signature matrix can be downloaded via APL cell signature
The regression model can be downloaded via The regression model
deconvolution <- function(exp.mat, sig.mat, ncores) {
merge.mat <- list(y = as.matrix(exp.mat[rownames(sig.mat), ]), x = as.matrix(sig.mat) )
nu <- c(0.25, 0.5, 0.75, 1)
res <- parallel::mclapply(seq_len(ncol(merge.mat$y)), function(z) {
lapply(nu, function(i) {
suppressMessages(
suppressWarnings(
svm(x = merge.mat$x,
y = as.vector(merge.mat$y[,z]),
nu = i,
type = 'nu-regression',
scale = FALSE,
kernel = 'linear')
))
})
}, mc.cores = ncores)
rmse <- lapply(res, function(z) {
lapply(z, function(i){
sqrt(sum(i$residuals ^ 2)/length(i$residuals))
})
})
rmse <- unlist(lapply(rmse, function(z) which.min(unlist(z))))
# extract models with min rmse
mod <- lapply(seq_along(res), function(z) {res[[z]][[rmse[z]]] })
# return coefficients
coefs <- lapply(mod, function(z) {as.numeric(t(z$coefs) %*% z$SV)})
coefs <- as.data.frame(t(data.frame(coefs)))
rownames(coefs) <- colnames(merge.mat$y)
colnames(coefs) <- colnames(merge.mat$x)
return(coefs)
}
## Read expression data
exp.tpm <- readRDS("data/test_sample.deconvolution.TPM.rds")
## Read APL cell signature
apl.signature <- read.table("data/CIBERSORTx_sig_1w_branch_mix_out.txt", header = T, row.names = 1)
exp.tpm %>% dim()
## [1] 59059 4
4.2 Regression
# Read model
linear.model <- readRDS("data/PredModel.rds")
branch.name <- c("Stem_like","CMP_like","S100_GMP_like","GMP_like","Cycling_GMP_like","MDP_like","Bcell","Tcell","Ery_HBB")
pred.score <- NULL
for (i in 1:length(branch.name)) {
exp = as.matrix(decon.bulk[, i])
lm.model <- linear.model[[i]]
pred.obv <- as.numeric(lm.model$coefficients[1]) +
as.numeric(lm.model$coefficients[2]) * as.numeric(decon.bulk[, i])
pred.score <- cbind(pred.score, pred.obv)
}
rownames(pred.score) <- rownames(decon.bulk)
colnames(pred.score) <- branch.name
# Coefficeint matrix
pred.score
## Stem_like CMP_like S100_GMP_like GMP_like Cycling_GMP_like MDP_like Bcell Tcell Ery_HBB
## Sample1_D0 2.0158598 6.990032 22.47653 56.60889 12.89132 0.2585436 1.863082 5.170869 0.7296987
## Sample1_D2 -0.2064932 8.106715 19.15908 51.55584 15.45176 1.0538323 2.035280 3.350186 0.9362890
## Sample2_D0 1.9453732 9.024004 23.14179 50.25093 14.03999 0.4811723 1.959813 5.061971 0.6398924
## Sample2_D2 -0.7605606 11.299307 20.08907 42.76156 13.56451 1.9169828 1.926964 4.476400 0.6773634
pred.percent <- pred.score
for (i in 1:nrow(pred.percent)) {
if (sum(pred.percent[i, ] < 0) > 0) {
pred.percent[i, ] <- pred.percent[i, ] - min(pred.percent[i, ])
pred.percent[i, ] <- pred.percent[i, ] / sum(pred.percent[i, ]) * 100
} else {
pred.percent[i, ] <- pred.percent[i, ] / sum(pred.percent[i, ]) * 100
}
}
# Proportion estimation
pred.percent
## Stem_like CMP_like S100_GMP_like GMP_like Cycling_GMP_like MDP_like Bcell Tcell Ery_HBB
## Sample1_D0 1.849331 6.412590 20.61976 51.93246 11.82638 0.2371855 1.709173 4.743706 0.6694187
## Sample1_D2 0.000000 8.047565 18.74675 50.10830 15.15790 1.2200524 2.170139 3.443028 1.1062652
## Sample2_D0 1.825871 8.469670 21.72022 47.16407 13.17753 0.4516144 1.839423 4.751019 0.6005846
## Sample2_D2 0.000000 11.731772 20.28240 42.33808 13.93535 2.6046993 2.614409 5.094486 1.3988044