Chapter 4 Deconvolution

4.1 SVR

The expample data of APL bulk RNA-seq can be downloaded via APL bulk demo data

The APL cell signature matrix can be downloaded via APL cell signature

The regression model can be downloaded via The regression model

deconvolution <- function(exp.mat, sig.mat, ncores) {
  
  merge.mat <- list(y = as.matrix(exp.mat[rownames(sig.mat), ]), x = as.matrix(sig.mat) )
  nu <- c(0.25, 0.5, 0.75, 1)

  res <- parallel::mclapply(seq_len(ncol(merge.mat$y)), function(z) {
    lapply(nu, function(i) {
      suppressMessages(
        suppressWarnings(
          svm(x = merge.mat$x,
              y = as.vector(merge.mat$y[,z]), 
              nu = i, 
              type = 'nu-regression',
              scale = FALSE, 
              kernel = 'linear')
        ))
    })
  }, mc.cores = ncores)
  
  rmse <-  lapply(res, function(z) { 
    lapply(z, function(i){
      sqrt(sum(i$residuals ^ 2)/length(i$residuals))
    })
  })
  rmse <- unlist(lapply(rmse, function(z) which.min(unlist(z))))
  # extract models with min rmse
  mod <- lapply(seq_along(res), function(z) {res[[z]][[rmse[z]]] })
  
  # return coefficients
  coefs <- lapply(mod, function(z) {as.numeric(t(z$coefs) %*% z$SV)})
  coefs <- as.data.frame(t(data.frame(coefs)))
  rownames(coefs) <- colnames(merge.mat$y)
  colnames(coefs) <- colnames(merge.mat$x)
  
  return(coefs)
}


## Read expression data
exp.tpm <- readRDS("data/test_sample.deconvolution.TPM.rds")

## Read APL cell signature 
apl.signature <- read.table("data/CIBERSORTx_sig_1w_branch_mix_out.txt", header = T, row.names = 1)

exp.tpm %>% dim()

## [1] 59059     4

exp.tpm %>% head()

decon.bulk <- deconvolution(exp.tpm, apl.signature, 2)

decon.bulk

4.2 Regression

# Read model
linear.model <- readRDS("data/PredModel.rds")

branch.name <- c("Stem_like","CMP_like","S100_GMP_like","GMP_like","Cycling_GMP_like","MDP_like","Bcell","Tcell","Ery_HBB")
pred.score <- NULL
for (i in 1:length(branch.name)) {
  exp = as.matrix(decon.bulk[, i])
  
  lm.model <- linear.model[[i]]
  pred.obv <-  as.numeric(lm.model$coefficients[1]) + 
    as.numeric(lm.model$coefficients[2]) * as.numeric(decon.bulk[, i])
  pred.score <- cbind(pred.score, pred.obv)
}
rownames(pred.score) <- rownames(decon.bulk)
colnames(pred.score) <- branch.name

# Coefficeint matrix
pred.score

##             Stem_like  CMP_like S100_GMP_like GMP_like Cycling_GMP_like  MDP_like    Bcell    Tcell   Ery_HBB
## Sample1_D0  2.0158598  6.990032      22.47653 56.60889         12.89132 0.2585436 1.863082 5.170869 0.7296987
## Sample1_D2 -0.2064932  8.106715      19.15908 51.55584         15.45176 1.0538323 2.035280 3.350186 0.9362890
## Sample2_D0  1.9453732  9.024004      23.14179 50.25093         14.03999 0.4811723 1.959813 5.061971 0.6398924
## Sample2_D2 -0.7605606 11.299307      20.08907 42.76156         13.56451 1.9169828 1.926964 4.476400 0.6773634

pred.percent <- pred.score
for (i in 1:nrow(pred.percent)) {
  if (sum(pred.percent[i, ] < 0) > 0) {
    pred.percent[i, ] <- pred.percent[i, ] - min(pred.percent[i, ]) 
    pred.percent[i, ] <- pred.percent[i, ] / sum(pred.percent[i, ]) * 100
  } else {
    pred.percent[i, ] <- pred.percent[i, ] / sum(pred.percent[i, ]) * 100
  }
}

# Proportion estimation
pred.percent

##            Stem_like  CMP_like S100_GMP_like GMP_like Cycling_GMP_like  MDP_like    Bcell    Tcell   Ery_HBB
## Sample1_D0  1.849331  6.412590      20.61976 51.93246         11.82638 0.2371855 1.709173 4.743706 0.6694187
## Sample1_D2  0.000000  8.047565      18.74675 50.10830         15.15790 1.2200524 2.170139 3.443028 1.1062652
## Sample2_D0  1.825871  8.469670      21.72022 47.16407         13.17753 0.4516144 1.839423 4.751019 0.6005846
## Sample2_D2  0.000000 11.731772      20.28240 42.33808         13.93535 2.6046993 2.614409 5.094486 1.3988044