1 Summary

In this set of simulations, we consider settings with varying number of hypothesis tests. Since many newer FDR controlling approaches require fitting some model to the independent covariates, they may be sensitive to lower numbers of tests. Both informative and uninformative covariates are included in these settings as described in simulations-informative-sine.Rmd.

2 Workspace Setup

library(dplyr)
library(ggplot2)
library(SummarizedBenchmark)
library(parallel)

## load helper functions
for (f in list.files("../R", "\\.(r|R)$", full.names = TRUE)) {
    source(f)
}

## project data/results folders
resdir <- "results"
dir.create(resdir, showWarnings = FALSE, recursive = TRUE)

## intermediary files we create below
n100_file <- file.path(resdir, "varyingntests-benchmark-n100.rds")
n500_file <- file.path(resdir, "varyingntests-benchmark-n500.rds")
n1000_file <- file.path(resdir, "varyingntests-benchmark-n1000.rds")
n5000_file <- file.path(resdir, "varyingntests-benchmark-n5000.rds")
n10000_file <- file.path(resdir, "varyingntests-benchmark-n10000.rds")
n50000_file <- file.path(resdir, "varyingntests-benchmark-n50000.rds")

## number of cores for parallelization
cores <- 20
B <- 100

## define bechmarking design
bd <- initializeBenchDesign()

As described in simulations-null.Rmd, we include Scott’s FDR Regression in the analysis for simulations with Gaussian test statistics. Again, we include both nulltype = "empirical" and nulltype = "theoretical". Since all settings in this series of simulations use test statistics simulated with Gaussian test statistics, we include Scott’s FDR Regression in all of the comparisons.

bdplus <- bd
bdplus <- addBMethod(bdplus, "fdrreg-t",
                     FDRreg::FDRreg,
                     function(x) { x$FDR },
                     z = test_statistic,
                     features = model.matrix( ~  splines::bs(ind_covariate, df = 3) - 1),
                     nulltype = 'theoretical',
                     control = list(lambda = 0.01))
bdplus <- addBMethod(bdplus, "fdrreg-e",
                     FDRreg::FDRreg,
                     function(x) { x$FDR },
                     z = test_statistic,
                     features = model.matrix( ~  splines::bs(ind_covariate, df = 3) - 1),
                     nulltype = 'empirical',
                     control = list(lambda = 0.01))

All simulation settings will share the following parameters.

es_dist <- rnorm_generator(3)       # functional: dist of alternative test stats
ts_dist <- rnorm_perturber(1)  # functional: sampling dist/noise for test stats
null_dist <- rnorm_2pvaluer(1)    # functional: dist to calc p-values
icovariate <- runif               # functional: independent covariate
pi0 <- pi0_sine(0.90)             # numeric: proportion of null hypotheses

Simulation results will be presented excluding a subset of methods, and for certain plots (upset plots), a single alpha cutoff will be used.

excludeSet <- c("unadjusted", "bl-df02", "bl-df04", "bl-df05")
ualpha <- 0.05

3 100 Tests

First, we consider the setting where 100 hypotheses are tested.

3.1 Data Simulation

m <- 100                        # integer: number of hypothesis tests
seed <- 1010

We next run the simulations.

if (file.exists(n100_file)) {
    res <- readRDS(n100_file)
} else {
    res <- mclapply(X = 1:B, FUN = simIteration, bench = bdplus, m = m,
                    pi0 = pi0, es_dist = es_dist, icovariate = icovariate,
                    ts_dist = ts_dist, null_dist = null_dist,
                    seed = seed, mc.cores = cores)
    saveRDS(res, file = n100_file)
}
res_i <- lapply(res, `[[`, "informative")
res_u <- lapply(res, `[[`, "uninformative")

3.2 Covariate Diagnostics

Here, we show the relationship between the independent covariate and p-values for a single replication of the experiment.

onerun <- simIteration(bdplus, m = m, pi0 = pi0, es_dist = es_dist, ts_dist = ts_dist,
                       icovariate = icovariate, null_dist = null_dist, execute = FALSE)

rank_scatter(onerun, pvalue = "pval", covariate = "ind_covariate")

strat_hist(onerun, pvalue = "pval", covariate = "ind_covariate", maxy = 10, numQ = 3)

## 
## Attaching package: 'cowplot'

## The following object is masked from 'package:ggplot2':
## 
##     ggsave

3.3 Benchmark Metrics

We plot the averaged results across 100 replications.

resdf <- plotsim_standardize(res_i, alpha = seq(0.01, 0.10, 0.01))

plotsim_average(resdf, met="rejections", filter_set = excludeSet,
                merge_ihw = TRUE, errorBars = TRUE)

plotsim_average(resdf, met="FDR", filter_set = excludeSet,
                merge_ihw = TRUE, errorBars = TRUE)

plotsim_average(resdf, met="TPR", filter_set = excludeSet,
                merge_ihw = TRUE, errorBars = TRUE)

plotsim_average(resdf, met="TNR", filter_set = excludeSet,
                merge_ihw = TRUE, errorBars = TRUE)

We also take a look at the distribution of rejects for each method as a function of the effect size and independent covariate.

covariateLinePlot(res_i, alpha = ualpha, covname = "effect_size")

## Warning: Removed 50 rows containing missing values (geom_path).

## Warning: Removed 50 rows containing missing values (geom_errorbar).

covariateLinePlot(res_i, alpha = ualpha, covname = "ind_covariate")

## Warning: Removed 50 rows containing missing values (geom_path).

## Warning: Removed 50 rows containing missing values (geom_errorbar).

Finally, (if enough methods produce rejections at 0.05) we take a look at the overlap of rejections between methods.

if (numberMethodsReject(resdf, alphacutoff = ualpha, filterSet = excludeSet) >= 3) {
    aggupset(res_i, alpha = ualpha, supplementary = FALSE, return_list = FALSE)
} else {
    message("Not enough methods found rejections at alpha ", ualpha, 
            "; skipping upset plot")
}

We also compare the simulation results with and without an informative covariate.

resdfu <- plotsim_standardize(res_u, alpha = seq(0.01, 0.10, 0.01))

resdfiu <- dplyr::full_join(select(resdf, rep, blabel, param.alpha, key,
                                   performanceMetric, alpha, value),
                            select(resdfu, rep, blabel, param.alpha, key,
                                   performanceMetric, alpha, value),
                            by = c("rep", "blabel", "param.alpha", "key",
                                   "performanceMetric", "alpha"),
                            suffix = c(".info", ".uninfo"))
resdfiu <- dplyr::mutate(resdfiu, value = value.info - value.uninfo)

plotsim_average(resdfiu, met="rejections", filter_set = excludeSet,
                merge_ihw = TRUE, errorBars = TRUE, diffplot = TRUE)

plotsim_average(resdfiu, met="FDR", filter_set = excludeSet,
                merge_ihw = TRUE, errorBars = TRUE, diffplot = TRUE)

plotsim_average(resdfiu, met="TPR", filter_set = excludeSet,
                merge_ihw = TRUE, errorBars = TRUE, diffplot = TRUE)

plotsim_average(resdfiu, met="TNR", filter_set = excludeSet,
                merge_ihw = TRUE, errorBars = TRUE, diffplot = TRUE)

4 500 Tests

Next, we consider the setting where 500 hypotheses are tested.

4.1 Data Simulation

m <- 500                        # integer: number of hypothesis tests
seed <- 5050

We next run the simulations.

if (file.exists(n500_file)) {
    res <- readRDS(n500_file)
} else {
    res <- mclapply(X = 1:B, FUN = simIteration, bench = bdplus, m = m,
                    pi0 = pi0, es_dist = es_dist, icovariate = icovariate,
                    ts_dist = ts_dist, null_dist = null_dist,
                    seed = seed, mc.cores = cores)
    saveRDS(res, file = n500_file)
}
res_i <- lapply(res, `[[`, "informative")
res_u <- lapply(res, `[[`, "uninformative")

4.2 Covariate Diagnostics

Here, we show the relationship between the independent covariate and p-values for a single replication of the experiment.

onerun <- simIteration(bdplus, m = m, pi0 = pi0, es_dist = es_dist, ts_dist = ts_dist,
                       icovariate = icovariate, null_dist = null_dist, execute = FALSE)

rank_scatter(onerun, pvalue = "pval", covariate = "ind_covariate")

strat_hist(onerun, pvalue = "pval", covariate = "ind_covariate", maxy = 10, numQ = 3)

4.3 Benchmark Metrics

We plot the averaged results across 100 replications.

resdf <- plotsim_standardize(res_i, alpha = seq(0.01, 0.10, 0.01))

plotsim_average(resdf, met="rejections", filter_set = excludeSet,
                merge_ihw = TRUE, errorBars = TRUE)

plotsim_average(resdf, met="FDR", filter_set = excludeSet,
                merge_ihw = TRUE, errorBars = TRUE)

plotsim_average(resdf, met="TPR", filter_set = excludeSet,
                merge_ihw = TRUE, errorBars = TRUE)

plotsim_average(resdf, met="TNR", filter_set = excludeSet,
                merge_ihw = TRUE, errorBars = TRUE)

We also take a look at the distribution of rejects for each method as a function of the effect size and independent covariate.

covariateLinePlot(res_i, alpha = ualpha, covname = "effect_size")

## Warning: Removed 50 rows containing missing values (geom_path).

## Warning: Removed 50 rows containing missing values (geom_errorbar).

covariateLinePlot(res_i, alpha = ualpha, covname = "ind_covariate")

## Warning: Removed 50 rows containing missing values (geom_path).

## Warning: Removed 50 rows containing missing values (geom_errorbar).

Finally, (if enough methods produce rejections at 0.05) we take a look at the overlap of rejections between methods.

if (numberMethodsReject(resdf, alphacutoff = ualpha, filterSet = excludeSet) >= 3) {
    aggupset(res_i, alpha = ualpha, supplementary = FALSE, return_list = FALSE)
} else {
    message("Not enough methods found rejections at alpha ", ualpha, 
            "; skipping upset plot")
}

We also compare the simulation results with and without an informative covariate.

resdfu <- plotsim_standardize(res_u, alpha = seq(0.01, 0.10, 0.01))

resdfiu <- dplyr::full_join(select(resdf, rep, blabel, param.alpha, key,
                                   performanceMetric, alpha, value),
                            select(resdfu, rep, blabel, param.alpha, key,
                                   performanceMetric, alpha, value),
                            by = c("rep", "blabel", "param.alpha", "key",
                                   "performanceMetric", "alpha"),
                            suffix = c(".info", ".uninfo"))
resdfiu <- dplyr::mutate(resdfiu, value = value.info - value.uninfo)

plotsim_average(resdfiu, met="rejections", filter_set = excludeSet,
                merge_ihw = TRUE, errorBars = TRUE, diffplot = TRUE)

plotsim_average(resdfiu, met="FDR", filter_set = excludeSet,
                merge_ihw = TRUE, errorBars = TRUE, diffplot = TRUE)

plotsim_average(resdfiu, met="TPR", filter_set = excludeSet,
                merge_ihw = TRUE, errorBars = TRUE, diffplot = TRUE)

plotsim_average(resdfiu, met="TNR", filter_set = excludeSet,
                merge_ihw = TRUE, errorBars = TRUE, diffplot = TRUE)

5 1000 Tests

Next, we consider the setting where 1000 hypotheses are tested.

5.1 Data Simulation

m <- 1000                        # integer: number of hypothesis tests
seed <- 10001

We next run the simulations.

if (file.exists(n1000_file)) {
    res <- readRDS(n1000_file)
} else {
    res <- mclapply(X = 1:B, FUN = simIteration, bench = bdplus, m = m,
                    pi0 = pi0, es_dist = es_dist, icovariate = icovariate,
                    ts_dist = ts_dist, null_dist = null_dist,
                    seed = seed, mc.cores = cores)
    saveRDS(res, file = n1000_file)
}
res_i <- lapply(res, `[[`, "informative")
res_u <- lapply(res, `[[`, "uninformative")

5.2 Covariate Diagnostics

Here, we show the relationship between the independent covariate and p-values for a single replication of the experiment.

onerun <- simIteration(bdplus, m = m, pi0 = pi0, es_dist = es_dist, ts_dist = ts_dist,
                       icovariate = icovariate, null_dist = null_dist, execute = FALSE)

rank_scatter(onerun, pvalue = "pval", covariate = "ind_covariate")

strat_hist(onerun, pvalue = "pval", covariate = "ind_covariate", maxy = 10, numQ = 3)

5.3 Benchmark Metrics

We plot the averaged results across 100 replications.

resdf <- plotsim_standardize(res_i, alpha = seq(0.01, 0.10, 0.01))

plotsim_average(resdf, met="rejections", filter_set = excludeSet,
                merge_ihw = TRUE, errorBars = TRUE)

plotsim_average(resdf, met="FDR", filter_set = excludeSet,
                merge_ihw = TRUE, errorBars = TRUE)

plotsim_average(resdf, met="TPR", filter_set = excludeSet,
                merge_ihw = TRUE, errorBars = TRUE)

plotsim_average(resdf, met="TNR", filter_set = excludeSet,
                merge_ihw = TRUE, errorBars = TRUE)

We also take a look at the distribution of rejects for each method as a function of the effect size and independent covariate.

covariateLinePlot(res_i, alpha = ualpha, covname = "effect_size")

covariateLinePlot(res_i, alpha = ualpha, covname = "ind_covariate")

Finally, (if enough methods produce rejections at 0.05) we take a look at the overlap of rejections between methods.

if (numberMethodsReject(resdf, alphacutoff = ualpha, filterSet = excludeSet) >= 3) {
    aggupset(res_i, alpha = ualpha, supplementary = FALSE, return_list = FALSE)
} else {
    message("Not enough methods found rejections at alpha ", ualpha, 
            "; skipping upset plot")
}

We also compare the simulation results with and without an informative covariate.

resdfu <- plotsim_standardize(res_u, alpha = seq(0.01, 0.10, 0.01))

resdfiu <- dplyr::full_join(select(resdf, rep, blabel, param.alpha, key,
                                   performanceMetric, alpha, value),
                            select(resdfu, rep, blabel, param.alpha, key,
                                   performanceMetric, alpha, value),
                            by = c("rep", "blabel", "param.alpha", "key",
                                   "performanceMetric", "alpha"),
                            suffix = c(".info", ".uninfo"))
resdfiu <- dplyr::mutate(resdfiu, value = value.info - value.uninfo)

plotsim_average(resdfiu, met="rejections", filter_set = excludeSet,
                merge_ihw = TRUE, errorBars = TRUE, diffplot = TRUE)

plotsim_average(resdfiu, met="FDR", filter_set = excludeSet,
                merge_ihw = TRUE, errorBars = TRUE, diffplot = TRUE)

plotsim_average(resdfiu, met="TPR", filter_set = excludeSet,
                merge_ihw = TRUE, errorBars = TRUE, diffplot = TRUE)

plotsim_average(resdfiu, met="TNR", filter_set = excludeSet,
                merge_ihw = TRUE, errorBars = TRUE, diffplot = TRUE)

6 5000 Tests

Next, we consider the setting where 5000 hypotheses are tested.

6.1 Data Simulation

m <- 5000                        # integer: number of hypothesis tests
seed <- 50005

We next run the simulations.

if (file.exists(n5000_file)) {
    res <- readRDS(n5000_file)
} else {
    res <- mclapply(X = 1:B, FUN = simIteration, bench = bdplus, m = m,
                    pi0 = pi0, es_dist = es_dist, icovariate = icovariate,
                    ts_dist = ts_dist, null_dist = null_dist,
                    seed = seed, mc.cores = cores)
    saveRDS(res, file = n5000_file)
}
res_i <- lapply(res, `[[`, "informative")
res_u <- lapply(res, `[[`, "uninformative")

6.2 Covariate Diagnostics

Here, we show the relationship between the independent covariate and p-values for a single replication of the experiment.

onerun <- simIteration(bdplus, m = m, pi0 = pi0, es_dist = es_dist, ts_dist = ts_dist,
                       icovariate = icovariate, null_dist = null_dist, execute = FALSE)

rank_scatter(onerun, pvalue = "pval", covariate = "ind_covariate")

strat_hist(onerun, pvalue = "pval", covariate = "ind_covariate", maxy = 10, numQ = 3)

6.3 Benchmark Metrics

We plot the averaged results across 100 replications.

resdf <- plotsim_standardize(res_i, alpha = seq(0.01, 0.10, 0.01))

plotsim_average(resdf, met="rejections", filter_set = excludeSet,
                merge_ihw = TRUE, errorBars = TRUE)

plotsim_average(resdf, met="FDR", filter_set = excludeSet,
                merge_ihw = TRUE, errorBars = TRUE)

plotsim_average(resdf, met="TPR", filter_set = excludeSet,
                merge_ihw = TRUE, errorBars = TRUE)

plotsim_average(resdf, met="TNR", filter_set = excludeSet,
                merge_ihw = TRUE, errorBars = TRUE)

We also take a look at the distribution of rejects for each method as a function of the effect size and independent covariate.

covariateLinePlot(res_i, alpha = ualpha, covname = "effect_size")

covariateLinePlot(res_i, alpha = ualpha, covname = "ind_covariate")

Finally, (if enough methods produce rejections at 0.05) we take a look at the overlap of rejections between methods.

if (numberMethodsReject(resdf, alphacutoff = ualpha, filterSet = excludeSet) >= 3) {
    aggupset(res_i, alpha = ualpha, supplementary = FALSE, return_list = FALSE)
} else {
    message("Not enough methods found rejections at alpha ", ualpha, 
            "; skipping upset plot")
}

We also compare the simulation results with and without an informative covariate.

resdfu <- plotsim_standardize(res_u, alpha = seq(0.01, 0.10, 0.01))

resdfiu <- dplyr::full_join(select(resdf, rep, blabel, param.alpha, key,
                                   performanceMetric, alpha, value),
                            select(resdfu, rep, blabel, param.alpha, key,
                                   performanceMetric, alpha, value),
                            by = c("rep", "blabel", "param.alpha", "key",
                                   "performanceMetric", "alpha"),
                            suffix = c(".info", ".uninfo"))
resdfiu <- dplyr::mutate(resdfiu, value = value.info - value.uninfo)

plotsim_average(resdfiu, met="rejections", filter_set = excludeSet,
                merge_ihw = TRUE, errorBars = TRUE, diffplot = TRUE)

plotsim_average(resdfiu, met="FDR", filter_set = excludeSet,
                merge_ihw = TRUE, errorBars = TRUE, diffplot = TRUE)

plotsim_average(resdfiu, met="TPR", filter_set = excludeSet,
                merge_ihw = TRUE, errorBars = TRUE, diffplot = TRUE)

plotsim_average(resdfiu, met="TNR", filter_set = excludeSet,
                merge_ihw = TRUE, errorBars = TRUE, diffplot = TRUE)

7 10000 Tests

Next, we consider the setting where 10000 hypotheses are tested.

7.1 Data Simulation

m <- 10000                        # integer: number of hypothesis tests
seed <- 1515

We next run the simulations.

if (file.exists(n10000_file)) {
    res <- readRDS(n10000_file)
} else {
    res <- mclapply(X = 1:B, FUN = simIteration, bench = bdplus, m = m,
                    pi0 = pi0, es_dist = es_dist, icovariate = icovariate,
                    ts_dist = ts_dist, null_dist = null_dist,
                    seed = seed, mc.cores = cores)
    saveRDS(res, file = n10000_file)
}
res_i <- lapply(res, `[[`, "informative")
res_u <- lapply(res, `[[`, "uninformative")

7.2 Covariate Diagnostics

Here, we show the relationship between the independent covariate and p-values for a single replication of the experiment.

onerun <- simIteration(bdplus, m = m, pi0 = pi0, es_dist = es_dist, ts_dist = ts_dist,
                       icovariate = icovariate, null_dist = null_dist, execute = FALSE)

rank_scatter(onerun, pvalue = "pval", covariate = "ind_covariate")

strat_hist(onerun, pvalue = "pval", covariate = "ind_covariate", maxy = 10, numQ = 3)

7.3 Benchmark Metrics

We plot the averaged results across 100 replications.

resdf <- plotsim_standardize(res_i, alpha = seq(0.01, 0.10, 0.01))

plotsim_average(resdf, met="rejections", filter_set = excludeSet,
                merge_ihw = TRUE, errorBars = TRUE)

plotsim_average(resdf, met="FDR", filter_set = excludeSet,
                merge_ihw = TRUE, errorBars = TRUE)

plotsim_average(resdf, met="TPR", filter_set = excludeSet,
                merge_ihw = TRUE, errorBars = TRUE)

plotsim_average(resdf, met="TNR", filter_set = excludeSet,
                merge_ihw = TRUE, errorBars = TRUE)

We also take a look at the distribution of rejects for each method as a function of the effect size and independent covariate.

covariateLinePlot(res_i, alpha = ualpha, covname = "effect_size")

covariateLinePlot(res_i, alpha = ualpha, covname = "ind_covariate")

Finally, (if enough methods produce rejections at 0.05) we take a look at the overlap of rejections between methods.

if (numberMethodsReject(resdf, alphacutoff = ualpha, filterSet = excludeSet) >= 3) {
    aggupset(res_i, alpha = ualpha, supplementary = FALSE, return_list = FALSE)
} else {
    message("Not enough methods found rejections at alpha ", ualpha, 
            "; skipping upset plot")
}

We also compare the simulation results with and without an informative covariate.

resdfu <- plotsim_standardize(res_u, alpha = seq(0.01, 0.10, 0.01))

resdfiu <- dplyr::full_join(select(resdf, rep, blabel, param.alpha, key,
                                   performanceMetric, alpha, value),
                            select(resdfu, rep, blabel, param.alpha, key,
                                   performanceMetric, alpha, value),
                            by = c("rep", "blabel", "param.alpha", "key",
                                   "performanceMetric", "alpha"),
                            suffix = c(".info", ".uninfo"))
resdfiu <- dplyr::mutate(resdfiu, value = value.info - value.uninfo)

plotsim_average(resdfiu, met="rejections", filter_set = excludeSet,
                merge_ihw = TRUE, errorBars = TRUE, diffplot = TRUE)

plotsim_average(resdfiu, met="FDR", filter_set = excludeSet,
                merge_ihw = TRUE, errorBars = TRUE, diffplot = TRUE)

plotsim_average(resdfiu, met="TPR", filter_set = excludeSet,
                merge_ihw = TRUE, errorBars = TRUE, diffplot = TRUE)

plotsim_average(resdfiu, met="TNR", filter_set = excludeSet,
                merge_ihw = TRUE, errorBars = TRUE, diffplot = TRUE)

8 50000 Tests

Next, we consider the setting where 50000 hypotheses are tested.

8.1 Data Simulation

m <- 50000                        # integer: number of hypothesis tests
seed <- 5555

We next run the simulations.

if (file.exists(n50000_file)) {
    res <- readRDS(n50000_file)
} else {
    res <- mclapply(X = 1:B, FUN = simIteration, bench = bdplus, m = m,
                    pi0 = pi0, es_dist = es_dist, icovariate = icovariate,
                    ts_dist = ts_dist, null_dist = null_dist,
                    seed = seed, mc.cores = cores)
    saveRDS(res, file = n50000_file)
}
res_i <- lapply(res, `[[`, "informative")
res_u <- lapply(res, `[[`, "uninformative")

8.2 Covariate Diagnostics

Here, we show the relationship between the independent covariate and p-values for a single replication of the experiment.

onerun <- simIteration(bdplus, m = m, pi0 = pi0, es_dist = es_dist, ts_dist = ts_dist,
                       icovariate = icovariate, null_dist = null_dist, execute = FALSE)

rank_scatter(onerun, pvalue = "pval", covariate = "ind_covariate")

## Warning: Removed 2 rows containing non-finite values (stat_binhex).

strat_hist(onerun, pvalue = "pval", covariate = "ind_covariate", maxy = 10, numQ = 3)

8.3 Benchmark Metrics

We plot the averaged results across 100 replications.

resdf <- plotsim_standardize(res_i, alpha = seq(0.01, 0.10, 0.01))

plotsim_average(resdf, met="rejections", filter_set = excludeSet,
                merge_ihw = TRUE, errorBars = TRUE)

plotsim_average(resdf, met="FDR", filter_set = excludeSet,
                merge_ihw = TRUE, errorBars = TRUE)

plotsim_average(resdf, met="TPR", filter_set = excludeSet,
                merge_ihw = TRUE, errorBars = TRUE)

plotsim_average(resdf, met="TNR", filter_set = excludeSet,
                merge_ihw = TRUE, errorBars = TRUE)

We also take a look at the distribution of rejects for each method as a function of the effect size and independent covariate.

covariateLinePlot(res_i, alpha = ualpha, covname = "effect_size")

covariateLinePlot(res_i, alpha = ualpha, covname = "ind_covariate")

Finally, (if enough methods produce rejections at 0.05) we take a look at the overlap of rejections between methods.

if (numberMethodsReject(resdf, alphacutoff = ualpha, filterSet = excludeSet) >= 3) {
    aggupset(res_i, alpha = ualpha, supplementary = FALSE, return_list = FALSE)
} else {
    message("Not enough methods found rejections at alpha ", ualpha, 
            "; skipping upset plot")
}

We also compare the simulation results with and without an informative covariate.

resdfu <- plotsim_standardize(res_u, alpha = seq(0.01, 0.10, 0.01))

resdfiu <- dplyr::full_join(select(resdf, rep, blabel, param.alpha, key,
                                   performanceMetric, alpha, value),
                            select(resdfu, rep, blabel, param.alpha, key,
                                   performanceMetric, alpha, value),
                            by = c("rep", "blabel", "param.alpha", "key",
                                   "performanceMetric", "alpha"),
                            suffix = c(".info", ".uninfo"))
resdfiu <- dplyr::mutate(resdfiu, value = value.info - value.uninfo)

plotsim_average(resdfiu, met="rejections", filter_set = excludeSet,
                merge_ihw = TRUE, errorBars = TRUE, diffplot = TRUE)

plotsim_average(resdfiu, met="FDR", filter_set = excludeSet,
                merge_ihw = TRUE, errorBars = TRUE, diffplot = TRUE)

plotsim_average(resdfiu, met="TPR", filter_set = excludeSet,
                merge_ihw = TRUE, errorBars = TRUE, diffplot = TRUE)

plotsim_average(resdfiu, met="TNR", filter_set = excludeSet,
                merge_ihw = TRUE, errorBars = TRUE, diffplot = TRUE)

9 Session Info

sessionInfo()

## R version 3.5.0 (2018-04-23)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Linux 7 (Core)
## 
## Matrix products: default
## BLAS: /usr/lib64/libblas.so.3.4.2
## LAPACK: /usr/lib64/liblapack.so.3.4.2
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
##  [1] splines   parallel  stats4    stats     graphics  grDevices utils    
##  [8] datasets  methods   base     
## 
## other attached packages:
##  [1] bindrcpp_0.2.2              cowplot_0.9.3              
##  [3] hexbin_1.27.2               adaptMT_1.0.0              
##  [5] FDRreg_0.1                  fda_2.4.8                  
##  [7] Matrix_1.2-14               fdrtool_1.2.15             
##  [9] swfdr_1.6.0                 qvalue_2.12.0              
## [11] ashr_2.2-7                  IHW_1.8.0                  
## [13] magrittr_1.5                SummarizedBenchmark_0.99.2 
## [15] mclust_5.4.1                stringr_1.3.1              
## [17] rlang_0.2.2                 UpSetR_1.3.3               
## [19] SummarizedExperiment_1.10.1 DelayedArray_0.6.6         
## [21] BiocParallel_1.14.2         matrixStats_0.54.0         
## [23] Biobase_2.40.0              GenomicRanges_1.32.7       
## [25] GenomeInfoDb_1.16.0         IRanges_2.14.12            
## [27] S4Vectors_0.18.3            BiocGenerics_0.26.0        
## [29] tidyr_0.8.1                 ggplot2_3.0.0              
## [31] dplyr_0.7.7                
## 
## loaded via a namespace (and not attached):
##  [1] ggdendro_0.1-20        foreach_1.4.4          assertthat_0.2.0      
##  [4] ggstance_0.3.1         GenomeInfoDbData_1.1.0 ggrepel_0.8.0         
##  [7] mosaic_1.4.0           yaml_2.2.0             slam_0.1-43           
## [10] pillar_1.3.0           backports_1.1.2        lattice_0.20-35       
## [13] glue_1.3.0             digest_0.6.18          XVector_0.20.0        
## [16] colorspace_1.3-2       htmltools_0.3.6        plyr_1.8.4            
## [19] pkgconfig_2.0.2        broom_0.5.0            zlibbioc_1.26.0       
## [22] purrr_0.2.5            scales_1.0.0           mosaicData_0.17.0     
## [25] tibble_1.4.2           withr_2.1.2            lazyeval_0.2.1        
## [28] crayon_1.3.4           evaluate_0.12          nlme_3.1-137          
## [31] doParallel_1.0.14      MASS_7.3-51            truncnorm_1.0-8       
## [34] tools_3.5.0            munsell_0.5.0          compiler_3.5.0        
## [37] grid_3.5.0             RCurl_1.95-4.11        iterators_1.0.10      
## [40] labeling_0.3           mosaicCore_0.6.0       bitops_1.0-6          
## [43] rmarkdown_1.10         gtable_0.2.0           codetools_0.2-15      
## [46] reshape2_1.4.3         R6_2.3.0               gridExtra_2.3         
## [49] knitr_1.20             ggformula_0.9.0        bindr_0.1.1           
## [52] rprojroot_1.3-2        lpsymphony_1.8.0       stringi_1.2.4         
## [55] pscl_1.5.2             SQUAREM_2017.10-1      Rcpp_0.12.19          
## [58] tidyselect_0.2.5

Simulation Study: Varying Number of Tests

Patrick Kimes

October 30, 2018

1 Summary

2 Workspace Setup

3 100 Tests

3.1 Data Simulation

3.2 Covariate Diagnostics

3.3 Benchmark Metrics

4 500 Tests

4.1 Data Simulation

4.2 Covariate Diagnostics

4.3 Benchmark Metrics

5 1000 Tests

5.1 Data Simulation

5.2 Covariate Diagnostics

5.3 Benchmark Metrics

6 5000 Tests

6.1 Data Simulation

6.2 Covariate Diagnostics

6.3 Benchmark Metrics

7 10000 Tests

7.1 Data Simulation

7.2 Covariate Diagnostics

7.3 Benchmark Metrics

8 50000 Tests

8.1 Data Simulation

8.2 Covariate Diagnostics

8.3 Benchmark Metrics

9 Session Info