--- title: "Clustering of DAT cells" author: "Davide Risso" date: '`r Sys.Date()`' output: html_document: fig_height: 7 fig_width: 7 toc: yes code_folding: hide toc_float: yes editor_options: chunk_output_type: console --- ```{r options, include=FALSE} knitr::opts_chunk$set(cache=TRUE, error=FALSE, message=FALSE, warning=FALSE, resuts = "hide") library(scone) library(stringi) library(RColorBrewer) library(clusterExperiment) library(Rtsne) library(matrixStats) cols <- c(brewer.pal(9, "Set1")) options(getClass.msg=FALSE) #NMF::nmf.options(grid.patch=TRUE) ``` ```{r datain} load("data/Oct15/imageAfterFiltering.RData") dat <- sf.sc.eSet[,sf.sc.eSet$MD_expt_condition == "DAT_EXPT1"] qc <- protocolData(dat)@data[,-c(6:9, 19)] batch <- droplevels(dat$MD_c1_run_id) chip_size <- dat$MD_c1_chip ``` # Filtering low-quality cells ```{r filtering} counts <- na.omit(assayData(dat)$counts_table) counts <- counts[rowSums(counts)>0,] num_reads <- quantile(counts[counts > 0])[4] num_cells <- 0.25*ncol(counts) is_common <- rowSums(counts >= num_reads ) >= num_cells table(is_common) data(housekeeping) hk <- intersect(stri_trans_totitle(housekeeping$V1), rownames(counts)) mfilt <- metric_sample_filter(counts, nreads = qc$NREADS, ralign = qc$RALIGN, hard_nreads = 500000, hard_ralign = 85, gene_filter = is_common, pos_controls = rownames(counts) %in% hk, zcut = 3, mixture = FALSE, plot = TRUE) # Mean log10(x+1) expression mu_obs <- rowMeans(log10(counts[hk,]+1)) # Assumed False Negatives drop_outs <- counts[hk,] == 0 # Logistic Regression Model of Failure ref.glms = list() for (si in 1:dim(drop_outs)[2]){ fit = glm(cbind(drop_outs[,si],1 - drop_outs[,si]) ~ mu_obs, family=binomial(logit)) ref.glms[[si]] = fit$coefficients } # Plot Failure Curves and Calculate AUC plot(NULL, main = "False Negative Rate Curves", ylim = c(0,1),xlim = c(0,6), ylab = "Failure Probability", xlab = "Mean log10 Expression") x = (0:60)/10 AUC = NULL for(si in 1:ncol(counts)){ y = 1/(exp(-ref.glms[[si]][1] - ref.glms[[si]][2] * x) + 1) AUC[si] = sum(y)/10 lines(x, 1/(exp(-ref.glms[[si]][1] - ref.glms[[si]][2] * x) + 1), type = 'l', lwd = 2, col = cols[as.numeric(batch)[si]+1]) } filtersample = !apply(simplify2array(mfilt[!is.na(mfilt)]), 1, any) table(filtersample) #filtered <- counts[,filtersample] filtergenes <- rowSums(counts>10)>=10 table(filtergenes) filtered <- counts[filtergenes,] qc <- qc[colnames(filtered),] pca <- prcomp(t(log1p(filtered)), scale. = TRUE) plot(pca$x[,1:2], pch=19, col=cols[as.numeric(filtersample)+1], main="Raw data") legend("bottomright", c("low-quality", "high-quality"), fill=cols) fq <- FQ_FN(filtered) pca <- prcomp(t(log1p(fq)), scale. = TRUE) plot(pca$x[,1:2], pch=19, col=cols[as.numeric(filtersample)+1], main="FQ data") legend("bottomright", c("low-quality", "high-quality"), fill=cols) qcpca <- prcomp(qc, scale. = TRUE) plot(qcpca$x[,1:2], pch=19, col=cols[as.numeric(filtersample)+1], main="QC PCA") legend("bottomright", c("low-quality", "high-quality"), fill=cols) plot(qc[,c(1, 3)], pch=19, col=cols[as.numeric(filtersample)+1]) legend("bottomright", c("low-quality", "high-quality"), fill=cols) ``` For now, I am removing all the low-quality cells. But I'm wondering if we should revisit this. Could they be biologically different cell types that express fewer genes? This leaves us with `r sum(filtersample)` cells out of `r length(filtersample)`. ```{r filtered} filtered <- counts[,filtersample] filtergenes <- rowSums(filtered>10)>=10 filtered <- filtered[filtergenes,] qc <- qc[colnames(filtered),] batch <- droplevels(batch[filtersample]) chip_size <- droplevels(chip_size[filtersample]) pca <- prcomp(t(log1p(filtered)), scale. = TRUE) plot(pca$x[,1:2], pch=19, col=cols[batch], main="Raw data") fq <- FQ_FN(filtered) pca <- prcomp(t(log1p(fq)), scale. = TRUE) plot(pca$x[,1:2], pch=19, col=cols[batch], main="FQ data") qcpca <- prcomp(qc, scale. = TRUE) plot(qcpca$x[,1:2], pch=19, col=cols[batch], main="QC PCA") plot(qc[,c(1, 3)], pch=19, col=cols[batch], main="QC") ``` # Normalization Here, we use `scone` to normalize the samples. We use housekeeping genes as negative control genes and a collection of positive controls from Poulin and Linnarson. ```{r controls} ## Positive controls: genes found DE between 6 DA neuron subtypes by Poulin et al poulin <- c("Sox6", "Sncg", "Ndnf", "Igf1", "Foxa2", "Lmx1a", "Aldh1a1", "Slc32a1", "Satb1", "Clstn2", "Adcyap1", "Lpl", "Otx2", "Vip", "Chrna4", "Gsg1l", "Snca", "Ntf3") linnarson <- c("Epha4", "Chrna5", "Nrip3", "Kcns3", "Cplx1", "Sox6", "Ndnf", "Kifc3", "Calb1", "Chst8", "Aldh1a1", "Igfbp2", "Lama5", "Anxa1", "Rpb4", "Aldh1a7", "Adcyap1", "Lhfpl2", "Cbln4", "Lpl", "Nhlh2", "Otx1", "Syn2", "Cbln1", "Gpx3", "Fjx1", "Foxa2", "En2", "Ntf3", "Gfra2", "Lix1", "Ptpn5", "Fgf1", "Nostrin", "Serpine2", "Kcnip3", "Grik1", "Lypd1", "Pou3f1", "Cd9", "Otx2", "Neurod6", "Grp", "Tcf12", "Calca", "Gpr83", "Vip", "Cck", "Cnr1", "Nphp1", "Chtf8", "Slc32a1", "Ctxn3", "Etv1", "Lmx1a") poscon <- intersect(c(poulin, linnarson), rownames(fq)) ## Negative controls data(housekeeping) hk <- intersect(rownames(fq), stri_trans_totitle(housekeeping[,1])) plotHeatmap(log1p(fq[poscon,]), main="Positive controls") plotHeatmap(log1p(fq[hk,]), main="Negative controls") ``` ```{r scone} set.seed(474) negcon_ruv <- sample(hk, round(length(hk)/2)) negcon_eval <- setdiff(hk, negcon_ruv) SUM_FN = function (ei) { sums = colSums(ei) eo = t(t(ei)*mean(sums)/sums) return(eo) } scale_funs <- list(none=identity, # Identity - do nothing sum = SUM_FN, # User-defined functions tmm = TMM_FN, # SCONE library wrappers... fq = FQT_FN, fq2 = FQ_FN) scone_obj <- SconeExperiment(filtered, qc=qc, batch=batch, negcon_ruv = rownames(filtered) %in% negcon_ruv, negcon_eval = rownames(filtered) %in% negcon_eval, poscon = rownames(filtered) %in% poscon) scone_res <- scone(scone_obj, zero="postadjust", scaling=scale_funs, adjust_batch="yes", adjust_bio="no", bpparam=BiocParallel::MulticoreParam(7)) ``` ```{r scone_plots} pc_obj <- prcomp(apply(t(get_scores(scone_res)),1,rank), center = TRUE,scale = FALSE) bp_obj <- biplot_color(pc_obj,y = -get_score_ranks(scone_res),expand = .6) points(t(bp_obj[1,]), pch = 1, col = "red", cex = 1) points(t(bp_obj[1,]), pch = 1, col = "red", cex = 1.5) points(t(bp_obj[rownames(bp_obj) == "none,none,no_uv,no_bio,no_batch",]), pch = 1, col = "blue", cex = 1) if(FALSE) { biplot_interactive(scone_res) } head(get_scores(scone_res)) norm1 <- get_normalized(scone_res, "none,fq2,ruv_k=3,no_bio,batch") norm2 <- get_normalized(scone_res, "none,fq,ruv_k=3,no_bio,no_batch") norm3 <- get_normalized(scone_res, "none,fq2,no_uv,no_bio,no_batch") plotHeatmap(log1p(norm1[poscon,]), main="none,fq2,ruv_k=3,no_bio,batch") plotHeatmap(log1p(norm2[poscon,]), main="none,fq,ruv_k=3,no_bio,no_batch") plotHeatmap(log1p(norm3[poscon,]), main="none,fq2,no_uv,no_bio,no_batch") ``` # Clustering ```{r tightcluster} seed <- 927501 rsec <- RSEC(norm3, isCount = TRUE, minSizes = 5, alphas = 0.3, combineProportion = 0.5, mergeMethod = "adjP", mergeCutoff = 0.05, ncores = 7, random.seed = seed, run = TRUE) # cl <- clusterMany(norm3, isCount = TRUE, dimReduce="PCA", nPCADims = 50, alphas = 0.3, # sequential = TRUE, subsample = TRUE, minSizes = 5, # clusterFun = c("hierarchical01", "tight"), # ks = 4:15, random.seed = seed, # ncores = 7) # cl <- combineMany(cl, proportion = .5) # cl <- makeDendrogram(cl, dimReduce = "mad", ndims = 1000) # plotDendrogram(cl) # cl <- mergeClusters(cl, mergeMethod = "adjP", cutoff = 0.05) # plotClusters(cl) plotClusters(rsec) ``` ```{r final} final_tight <- factor(clusterMatrixNamed(rsec)[,2], levels=clusterLegend(rsec)[[2]][, "name"]) names(final_tight) <- colnames(fq) final_merged <- factor(primaryClusterNamed(rsec), levels=clusterLegend(rsec)[[1]][, "name"]) names(final_merged) <- colnames(fq) print(table(final_tight, final_merged)) plotDendrogram(rsec) plotCoClustering(rsec, whichClusters = 1:2) pdf("coclustering.pdf") plotCoClustering(rsec, whichClusters = 1:2) dev.off() ``` # Visualization ```{r visualize} wh_rm <- which(final_tight=="-1") pca <- prcomp(t(log1p(norm3)[,-wh_rm]), scale. = TRUE, center = TRUE) cols1 <- clusterLegend(rsec)[[2]][, "color"] names(cols1) <- clusterLegend(rsec)[[2]][, "name"] cols2 <- clusterLegend(rsec)[[1]][, "color"] names(cols2) <- clusterLegend(rsec)[[1]][, "name"] colFinal <- cols1[final_tight] colMerged <- cols2[final_merged] plot(pca$x, pch=19, col=colFinal[-wh_rm], main="PCA") legend("topright", levels(final_tight), fill=cols1[levels(final_tight)]) plot(pca$x, pch=19, col=colMerged[-wh_rm], main="PCA, merged") legend("topright", levels(final_merged), fill=cols2[levels(final_merged)]) vars <- rowVars(log1p(norm3)) names(vars) <- rownames(norm3) vars <- sort(vars, decreasing = TRUE) tsne_500 <- Rtsne(t(log1p(norm3)[names(vars)[1:500],-wh_rm]), max_iter=5000, perplexity = 10) plot(tsne_500$Y, pch=19, col=colFinal[-wh_rm], main="t-SNE, 500 most variable genes") plot(tsne_500$Y, pch=19, col=colMerged[-wh_rm], main="t-SNE, 500 most variable genes, merged") tsne_250 <- Rtsne(t(log1p(norm3)[names(vars)[1:250],-wh_rm]), max_iter=5000, perplexity = 10) plot(tsne_250$Y, pch=19, col=colFinal[-wh_rm], main="t-SNE, 250 most variable genes") plot(tsne_250$Y, pch=19, col=colMerged[-wh_rm], main="t-SNE, 250 most variable genes, merged") tsne_1000 <- Rtsne(t(log1p(norm3)[names(vars)[1:1000],-wh_rm]), max_iter=5000, perplexity = 10) plot(tsne_1000$Y, pch=19, col=colFinal[-wh_rm], main="t-SNE, 1,000 most variable genes") plot(tsne_1000$Y, pch=19, col=colMerged[-wh_rm], main="t-SNE, 1,000 most variable genes, merged") tsne_all <- Rtsne(t(log1p(norm3)[,-wh_rm]), max_iter=5000, perplexity = 10) plot(tsne_all$Y, pch=19, col=colFinal[-wh_rm], main="t-SNE, All genes") plot(tsne_all$Y, pch=19, col=colMerged[-wh_rm], main="t-SNE, All genes, merged") plotHeatmap(rsec, whichClusters=1:2, clusterFeaturesData=poscon) pdf("tsne_500.pdf") plot(tsne_500$Y, pch=19, col=colFinal[-wh_rm], main="t-SNE, 500 most variable genes") dev.off() pdf("tsne_250.pdf") plot(tsne_250$Y, pch=19, col=colFinal[-wh_rm], main="t-SNE, 250 most variable genes") dev.off() pdf("tsne_1000.pdf") plot(tsne_1000$Y, pch=19, col=colFinal[-wh_rm], main="t-SNE, 1,000 most variable genes") dev.off() pdf("tsne_all.pdf") plot(tsne_all$Y, pch=19, col=colFinal[-wh_rm], main="t-SNE, All genes") dev.off() pdf("positive_controls.pdf") plotHeatmap(rsec, whichClusters=1:2, clusterFeaturesData=poscon) dev.off() ``` ```{r zinb} run_zinb <- FALSE if(run_zinb) { library(zinbwave) library(BiocParallel) library(doParallel) registerDoParallel(6) register(DoparParam()) system.time(zinb <- zinbFit(filtered[,-wh_rm], K=10)) save(zinb, file = "zinb_res.rda") } else { load("zinb_res.rda") } plot(zinb@W, pch=19, col=colFinal[-wh_rm], main="ZINB") tsne_zinb <- Rtsne(zinb@W, pca = FALSE, max_iter=5000, perplexity = 20) plot(tsne_zinb$Y, pch=19, col=colFinal[-wh_rm], main="t-SNE, after ZINB", xlab = "Dimension 1", ylab = "Dimension 2") pdf("tsne_zinb_all.pdf") plot(tsne_zinb$Y, pch=19, col=colFinal[-wh_rm], main="t-SNE, after ZINB", xlab = "Dimension 1", ylab = "Dimension 2") dev.off() ``` ```{r alt, eval=FALSE} library(Seurat) pbmc <- CreateSeuratObject(raw.data = filtered, min.cells = 1, min.genes = 1, project = "10X_PBMC") ## Build SNN k.param = 10 k.scale = 10 data.use <- getW(zinb) n.cells = NROW(data.use) my.knn <- FNN::get.knn(as.matrix(data.use), k = min(k.scale * k.param, n.cells - 1)) nn.ranked <- cbind(1:n.cells, my.knn$nn.index[, 1:(k.param-1)]) nn.large <- my.knn$nn.index w <- Seurat:::CalcSNNSparse(cell.names = colnames(filtered[,-wh_rm]), k.param = k.param, nn.large = nn.large, nn.ranked = nn.ranked, prune.SNN = 1/15, print.output = FALSE) pbmc@snn <- w ## Run modularity clustering resolution <- 0.4 pbmc <- Seurat:::RunModularityClustering(object = pbmc, SNN = w, modularity = 1, resolution = resolution, algorithm = 1, n.start = 100, n.iter = 10, random.seed = 0, print.output = FALSE, temp.file.location = NULL) pbmc <- Seurat:::GroupSingletons(pbmc, pbmc@snn) name <- paste("res.", resolution, sep = "") pbmc <- StashIdent(pbmc, name) z_cl <- pbmc@ident ``` # Marker genes ```{r heatmap} primaryClusterIndex(rsec) <- 2 genes <- getBestFeatures(rsec, contrastType = "Pairs", isCount=TRUE) head(genes) plotHeatmap(rsec, clusterSamplesData = "primaryCluster", clusterFeaturesData=unique(genes[,"IndexInOriginal"])) plotHeatmap(rsec, clusterFeaturesData=unique(genes[,"IndexInOriginal"])) pdf("heatmap.pdf") plotHeatmap(rsec, clusterSamplesData = "primaryCluster", clusterFeaturesData=unique(genes[,"IndexInOriginal"])) plotHeatmap(rsec, clusterFeaturesData=unique(genes[,"IndexInOriginal"])) dev.off() genes_1vall <- getBestFeatures(rsec, contrastType = "OneAgainstAll", isCount=TRUE, number=50) plotHeatmap(rsec, clusterSamplesData = "primaryCluster", clusterFeaturesData=unique(genes_1vall[,"IndexInOriginal"])) plotHeatmap(rsec, clusterFeaturesData=unique(genes_1vall[,"IndexInOriginal"])) pdf("heatmap_1vall.pdf") plotHeatmap(rsec, clusterSamplesData = "primaryCluster", clusterFeaturesData=unique(genes_1vall[,"IndexInOriginal"])) plotHeatmap(rsec, clusterFeaturesData=unique(genes_1vall[,"IndexInOriginal"])) dev.off() genes_dendro <- getBestFeatures(rsec, contrastType = "Dendro", isCount=TRUE, number=50) plotHeatmap(rsec, clusterSamplesData = "primaryCluster", clusterFeaturesData=unique(genes_dendro[,"IndexInOriginal"])) plotHeatmap(rsec, clusterFeaturesData=unique(genes_dendro[,"IndexInOriginal"])) pdf("heatmap_dendro.pdf") plotHeatmap(rsec, clusterSamplesData = "primaryCluster", clusterFeaturesData=unique(genes_dendro[,"IndexInOriginal"])) plotHeatmap(rsec, clusterFeaturesData=unique(genes_dendro[,"IndexInOriginal"])) dev.off() ``` ```{r save_markers} write.table(genes, file="dat_cluster_markers.txt", sep='\t', quote=FALSE) ``` ```{r markers} markers <- list(Set1=c("Erbb4", "Pcsk1", "Pepd", "Pdp2", "Nubp2", "Pik3c3", "Cacna2d3", "Kcns3", "Chrna3", "Epha4"), Set2=c("Adamts2", "Arpp21", "Rasgrp1", "Pitx2", "Irx6", "Cd36", "Pgr15l", "Gda", "Nos1", "Tac1"), Set3=c("Gad2", "Rasgrf1", "Satb2", "Fam159b", "Otof", "Msn", "Six6", "Gad1", "Ecel1", "Calcr"), Set4=c("Cnr1", "Cbln4", "Lpl", "Gpr83", "Neurod6", "Gkn1", "Igfbp4", "Prdm8", "Nxph3", "Tiam2") ) markers <-lapply(markers, function(x) which(rownames(rsec) %in% x)) blank_data <- makeBlankData(transform(rsec), markers) dat <- as.matrix(blank_data$dataWBlanks) rnames <- blank_data$rowNamesWBlanks rownames(dat) <- rnames breaks <- unique(c(min(dat, na.rm = TRUE), seq(0, quantile(dat[dat > 0], .99, na.rm = TRUE), length = 50), max(dat, na.rm = TRUE))) NMF::aheatmap(dat[,order(final_tight)], Rowv=NA, Colv=NA, color = seqPal5, annCol = data.frame(Clusters=sort(final_tight)), annColors = list(Clusters=cols1), breaks=breaks, main="Cluster markers") pdf("markers.pdf") NMF::aheatmap(dat[,order(final_tight)], Rowv=NA, Colv=NA, color = seqPal5, annCol = data.frame(Clusters=sort(final_tight)), annColors = list(Clusters=cols1), breaks=breaks, main="Cluster markers") dev.off() linnarson <- list(SNC_VTA1=c("Epha4", "Chrna5", "Nrip3", "Kcns3", "Cplx1", "Sox6", "Ndnf"), Not_SNC=c("Kifc3", "Calb1", "Chst8"), SNC_VTA2=c("Aldh1a1", "Igfbp2", "Lama5", "Anxa1", "Rpb4", "Aldh1a7"), VTA2_VTA3=c("Adcyap1", "Lhfpl2", "Cbln4", "Lpl", "Nhlh2", "Otx1"), VTA3_VTA4=c("Syn2", "Cbln1", "Gpx3"), VTA1_VTA3=c("Fjx1", "Foxa2", "En2", "Ntf3", "Gfra2"), SNC=c("Lix1", "Ptpn5", "Fgf1", "Nostrin", "Serpine2", "Kcnip3", "Grik1"), VTA1=c("Lypd1", "Pou3f1", "Cd9"), VTA2=c("Otx2", "Neurod6", "Grp", "Tcf12", "Calca", "Gpr83"), VTA3=c("Vip", "Cck", "Cnr1", "Nphp1", "Chtf8"), VTA4=c("Slc32a1", "Ctxn3", "Etv1", "Lmx1a")) markers <-lapply(linnarson, function(x) which(rownames(rsec) %in% x)) blank_data <- makeBlankData(transform(rsec), markers) dat <- as.matrix(blank_data$dataWBlanks) rnames <- blank_data$rowNamesWBlanks rownames(dat) <- rnames breaks <- unique(c(min(dat, na.rm = TRUE), seq(0, quantile(dat[dat > 0], .99, na.rm = TRUE), length = 50), max(dat, na.rm = TRUE))) NMF::aheatmap(dat[,order(final_tight)], Rowv=NA, Colv=NA, color = seqPal5, annCol = data.frame(Clusters=sort(final_tight)), annColors = list(Clusters=cols1), breaks=breaks, main="Linnarson markers") ``` From the last heatmap, we can see that there is correspondence between our clusters and Linnarson's clusters. ```{r markers2} markers <- c("Neurod6", "Grp", "Cnr1", "Gpr83", "Tcf12", "Gkn1", "Igfbp4", "Otx2", "Nxph3", "Calca") table(primaryClusterNamed(rsec)) idx <- order(tapply(transform(rsec)["Neurod6",], primaryClusterNamed(rsec), mean), decreasing = TRUE) cl <- factor(primaryClusterNamed(rsec), levels=levels(as.factor(primaryClusterNamed(rsec)))[idx]) orderSamples(rsec) <- order(transform(rsec)["Neurod6",], decreasing = TRUE) plotHeatmap(rsec, clusterFeaturesData=markers, clusterSamplesData = "orderSamplesValue") pdf("markers_Neurod6.pdf") plotHeatmap(rsec, clusterFeaturesData=markers, clusterSamplesData = "orderSamplesValue") dev.off() pdf("markers_opt1.pdf") plotHeatmap(rsec, clusterFeaturesData=markers, clusterFeatures = FALSE, clusterSamplesData = "orderSamplesValue") dev.off() pdf("markers_opt3.pdf") plotHeatmap(rsec, clusterFeaturesData=markers, clusterFeatures = FALSE, clusterSamplesData = "dendrogramValue") dev.off() idx <- order(tapply(transform(rsec)["Grp",], primaryClusterNamed(rsec), mean), decreasing = TRUE) cl <- factor(primaryClusterNamed(rsec), levels=levels(as.factor(primaryClusterNamed(rsec)))[idx]) orderSamples(rsec) <- order(transform(rsec)["Grp",], decreasing = TRUE) markers <- c("Grp", "Neurod6", "Cnr1", "Gpr83", "Tcf12", "Gkn1", "Igfbp4", "Otx2", "Nxph3", "Calca") pdf("markers_opt2.pdf") plotHeatmap(rsec, clusterFeaturesData=markers, clusterFeatures = FALSE, clusterSamplesData = "orderSamplesValue") dev.off() pdf("markers_opt4.pdf") plotHeatmap(rsec, clusterFeaturesData=markers, clusterFeatures = FALSE, clusterSamplesData = "dendrogramValue") dev.off() ``` ## Sex ```{r sex} dat <- sf.sc.eSet[,sf.sc.eSet$MD_expt_condition == "DAT_EXPT1"] sex <- dat$MD_sex names(sex) <- colnames(dat) sex <- sex[colnames(rsec)] colData(rsec)$sex <- sex age <- dat$MD_age names(age) <- colnames(dat) age <- age[colnames(rsec)] colData(rsec)$age <- age pdf("markers_opt4_sex.pdf") plotHeatmap(rsec, clusterFeaturesData=markers, clusterFeatures = FALSE, clusterSamplesData = "dendrogramValue", sampleData = "sex") dev.off() rsec@merge_index <- NA_integer_ rsec@merge_cutoff <- NA_integer_ rsec@merge_dendrocluster_index <- NA_integer_ rsec@merge_nodeMerge <- NULL rsec@merge_nodeProp <- NULL rsec@merge_method <- NA_character_ rsec@dendro_outbranch <- TRUE plotDendrogram(rsec) pdf("coclustering_sex.pdf") plotCoClustering(rsec, whichClusters = "combineMany", sampleData = c("sex")) dev.off() pdf("coclustering_sex_age.pdf") plotCoClustering(rsec, whichClusters = "combineMany", sampleData = c("sex", "age")) dev.off() ``` ```{r de} x <- as.factor(sex) design <- model.matrix(~x) colnames(design) <- c("Intercept", "male") counts <- round(assay(rsec)) library(edgeR) y <- DGEList(counts) y <- calcNormFactors(y) y <- estimateDisp(y, design) fit <- glmFit(y, design = design) lrt <- glmLRT(fit, coef = 2) top <- topTags(lrt, n = Inf)$table ``` ```{r de_visual} head(top) table(top$FDR<=0.05) hist(top$PValue) de <- rownames(top)[top$FDR <= 0.05] idx <- c("Neurod6", "Grp") idx %in% de top[idx,] pdf("volcano_de_genes_sex.pdf") plot(top$logFC, -log10(top$PValue), col="grey", pch=19, xlab="LogFC (M vs F)", ylab = "-log10(PValue)") points(top[idx, "logFC"], -log10(top[idx, "PValue"]), col=2, lwd=2) text(top[idx[1], "logFC"] + .3, -log10(top[idx[1], "PValue"]) + 3, labels = idx[1]) text(top[idx[2], "logFC"] - .3, -log10(top[idx[2], "PValue"]) + 3, labels = idx[2]) text(top[de[1:4], "logFC"] - 1, -log10(top[de[1:4], "PValue"]), labels = de[1:4]) dev.off() pdf("heatmap_de_genes_sex.pdf") plotHeatmap(rsec, clusterSamplesData = "hclust", sampleData = c("sex"), clusterFeaturesData=which(rownames(rsec) %in% de), breaks=.99) dev.off() pdf("heatmap_sex.pdf") plotHeatmap(rsec, clusterSamplesData = "primaryCluster", clusterFeaturesData=unique(genes[,"IndexInOriginal"]), sampleData = "sex") dev.off() ``` ```{r info} sessionInfo() ```