Introduction
This analysis compares the “Top 20” FLASH fit to the “Zero” FLASH fit (which does not include any canonical loadings). See here for fitting details. See here and here for introductions to the plots. Because the first data-driven loading in the “Zero” fit generally acts as a surrogate for the “equal effects” loading in the “Top 20” fit, I combine the two loadings in the plots below for ease of interpretation.
library(mashr)
Loading required package: ashr
devtools::load_all("/Users/willwerscheid/GitHub/flashr/")
Loading flashr
gtex <- readRDS(gzcon(url("https://github.com/stephenslab/gtexresults/blob/master/data/MatrixEQTLSumStats.Portable.Z.rds?raw=TRUE")))
strong <- t(gtex$strong.z)
fpath <- "./output/MASHvFLASHgtex2/"
top20_final <- readRDS(paste0(fpath, "top20.rds"))
zero_final <- readRDS(paste0(fpath, "zero.rds"))
all_fl_lfsr <- readRDS(paste0(fpath, "fllfsr.rds"))
top20_lfsr <- all_fl_lfsr[[4]]
zero_lfsr <- all_fl_lfsr[[5]]
top20_pm <- flash_get_fitted_values(top20_final)
zero_pm <- flash_get_fitted_values(zero_final)
missing.tissues <- c(7, 8, 19, 20, 24, 25, 31, 34, 37)
gtex.colors <- read.table("https://github.com/stephenslab/gtexresults/blob/master/data/GTExColors.txt?raw=TRUE", sep = '\t', comment.char = '')[-missing.tissues, 2]
OHF.colors <- c("tan4", "tan3")
zero.colors <- c("black", gray.colors(19, 0.2, 0.9),
gray.colors(17, 0.95, 1))
plot_test <- function(n, lfsr, pm, method_name) {
plot(strong[, n], pch=1, col="black", xlab="", ylab="", cex=0.6,
ylim=c(min(c(strong[, n], 0)), max(c(strong[, n], 0))),
main=paste0("Test #", n, "; ", method_name))
size = rep(0.6, 44)
shape = rep(15, 44)
signif <- lfsr[, n] <= .05
shape[signif] <- 17
size[signif] <- 1.35 - 15 * lfsr[signif, n]
size <- pmin(size, 1.2)
points(pm[, n], pch=shape, col=as.character(gtex.colors), cex=size)
abline(0, 0)
}
plot_ohf_v_ohl_loadings <- function(n, ohf_fit, ohl_fit, ohl_name,
legend_pos = "bottomright") {
ohf <- abs(ohf_fit$EF[n, ] * apply(abs(ohf_fit$EL), 2, max))
ohl <- -abs(ohl_fit$EF[n, ] * apply(abs(ohl_fit$EL), 2, max))
data <- rbind(c(ohf, rep(0, length(ohl) - 45)),
c(ohl[1:45], rep(0, length(ohf) - 45),
ohl[46:length(ohl)]))
colors <- c("black",
as.character(gtex.colors),
OHF.colors,
zero.colors[1:(length(ohl) - 45)])
x <- barplot(data, beside=T, col=rep(colors, each=2),
main=paste0("Test #", n, " loadings"),
legend.text = c("OHF", ohl_name),
args.legend = list(x = legend_pos, bty = "n", pch="+-",
fill=NULL, border="white"))
text(x[2*(46:47) - 1], min(data) / 10,
labels=as.character(1:2), cex=0.4)
text(x[2*(48:ncol(data))], max(data) / 10,
labels=as.character(1:(length(ohl) - 45)), cex=0.4)
}
plot_ohl_v_zero_loadings <- function(n, ohl_fit, zero_fit, ohl_name,
legend_pos = "topright") {
ohl <- abs(ohl_fit$EF[n, ] * apply(abs(ohl_fit$EL), 2, max))
# Combine equal effects and first data-driven loading
ohl[1] <- ohl[1] + ohl[46]
ohl <- ohl[-46]
zero <- -abs(zero_fit$EF[n, ] * apply(abs(zero_fit$EL), 2, max))
data <- rbind(c(ohl, rep(0, length(zero) - length(ohl) + 44)),
c(zero[1], rep(0, 44), zero[2:length(zero)]))
colors <- c("black", as.character(gtex.colors), zero.colors)
x <- barplot(data, beside=T, col=rep(colors, each=2),
main=paste0("Test #", n, " loadings"),
legend.text = c(ohl_name, "Zero"),
args.legend = list(x = legend_pos, bty = "n", pch="+-",
fill=NULL, border="white"))
text(x[2*(seq(46, ncol(data), by=2)) - 1], min(data) / 10,
labels=as.character(seq(2, length(zero), by=2)), cex=0.4)
}
compare_methods <- function(lfsr1, lfsr2, pm1, pm2) {
res <- list()
res$first_not_second <- find_A_not_B(lfsr1, lfsr2)
res$lg_first_not_second <- find_large_A_not_B(lfsr1, lfsr2)
res$second_not_first <- find_A_not_B(lfsr2, lfsr1)
res$lg_second_not_first <- find_large_A_not_B(lfsr2, lfsr1)
res$diff_pms <- find_overall_pm_diff(pm1, pm2)
return(res)
}
# Find tests where many conditions are significant according to
# method A but not according to method B.
find_A_not_B <- function(lfsrA, lfsrB) {
select_tests(colSums(lfsrA <= 0.05 & lfsrB > 0.05))
}
# Find tests where many conditions are highly significant according to
# method A but are not significant according to method B.
find_large_A_not_B <- function(lfsrA, lfsrB) {
select_tests(colSums(lfsrA <= 0.01 & lfsrB > 0.05))
}
find_overall_pm_diff <- function(pmA, pmB, n = 4) {
pm_diff <- colSums((pmA - pmB)^2)
return(order(pm_diff, decreasing = TRUE)[1:4])
}
# Get at least four (or min_n) "top" tests.
select_tests <- function(colsums, min_n = 4) {
n <- 45
n_tests <- 0
while (n_tests < min_n && n > 0) {
n <- n - 1
n_tests <- sum(colsums >= n)
}
return(which(colsums >= n))
}
plot_it <- function(n, legend.pos = "topright") {
par(mfrow=c(1, 2))
plot_test(n, top20_lfsr, top20_pm, "Top 20")
plot_test(n, zero_lfsr, zero_pm, "Zero")
par(mfrow=c(1, 1))
plot_ohl_v_zero_loadings(n, top20_final, zero_final, "Top 20",
legend.pos)
}
Different posterior means
Each of the following examples illustrates how the additional canonical loadings can create large differences in posterior means (the extra data-driven loadings are unimportant in each case). It is difficult to generalize any further: sometimes the result is a small equal-effects loading (#617); sometimes, the data-driven loadings become unimportant, so that the comparatively smaller effects are aggressively shrunken towards their mean (#10904, #10581). I think that these examples point up one of the principal weaknesses of the “Zero” fit, which is that without the canonical loadings, many of the results become difficult to interpret (and therefore less plausible).
diff.pms <- c(617, 10904, 10581)
for (n in diff.pms) {
plot_it(n)
}
Expand here to see past versions of diff_pms-1.png:
Version
|
Author
|
Date
|
1d57c08
|
Jason Willwerscheid
|
2018-08-04
|
Expand here to see past versions of diff_pms-2.png:
Version
|
Author
|
Date
|
1d57c08
|
Jason Willwerscheid
|
2018-08-04
|
Expand here to see past versions of diff_pms-3.png:
Version
|
Author
|
Date
|
1d57c08
|
Jason Willwerscheid
|
2018-08-04
|
Expand here to see past versions of diff_pms-4.png:
Version
|
Author
|
Date
|
1d57c08
|
Jason Willwerscheid
|
2018-08-04
|
Expand here to see past versions of diff_pms-5.png:
Version
|
Author
|
Date
|
1d57c08
|
Jason Willwerscheid
|
2018-08-04
|
Expand here to see past versions of diff_pms-6.png:
Version
|
Author
|
Date
|
1d57c08
|
Jason Willwerscheid
|
2018-08-04
|