test montecarlo cluster stat distribution

function test_suite = test_montecarlo_cluster_stat_distribution
    % probability uniformity tests for cosmo_montecarlo_cluster_stat
    %
    % #   For CoSMoMVPA's copyright information and license terms,   #
    % #   see the COPYING file distributed with CoSMoMVPA.           #
    try % assignment of 'localfunctions' is necessary in Matlab >= 2016
        test_functions = localfunctions();
    catch % no problem; early Matlab versions can use initTestSuite fine
    end
    initTestSuite;

function test_mccs_uniformity_slow()
    show_progress = true;

    if show_progress
        cleaner = onCleanup(@()progress_helper('-clear'));
        progress_helper(sprintf('Slow test in %s ', ...
                                mfilename()));
    end

    % test for uniformity of p-values of monte_carlo_cluster_stat
    %
    % this test aims to verify that there is not an excessive number of
    % false positives under the null hypothesis. It does so by running
    % monte_carlo_cluster_stat several times on random data, storing the p
    % values for each iteration, and comparing those to a uniform
    % distribution.
    %
    % because running this test is slow, it can perform the same test
    % multiple times, each time increasing the number of iterations. This
    % is repeated until enough evidence is gathered that p values are
    % uniform or not.

    max_attempts = 8;
    grow_niter = 1.2;

    % benchmark values obtained by running
    % helper_mccs_get_correlation_with_uniform multiple times with
    % 'correct' monte carlo custer stat function.
    uniform_c_mu = .985;
    uniform_c_sd = .05;

    % the null hypothesis is that p values are uniformly distributed;
    % earlier versions of monte_carlo_cluster_stat would fail this test,
    % with a lower correlation value as a result. The correlation value
    % is converted to a z-score
    %
    pass_min_z = -1;
    fail_max_z = -5;

    min_pass_or_fail_count = 2;
    count_pass = 0;
    count_fail = 0;

    ps_cell = cell(1, max_attempts);

    niter = 20;
    for attempt = 1:max_attempts
        ps_cell{attempt} = helper_mccs_get_pvalues(niter, show_progress);
        ps = sort(cat(1, ps_cell{:}));

        % compute correlation with expected p-values
        n_ps = numel(ps);
        ps_uniform = (.5:n_ps)' / n_ps;
        c = cosmo_corr(ps, ps_uniform);

        z = sqrt(niter) * (c - uniform_c_mu) / uniform_c_sd;

        if z > pass_min_z
            count_pass = count_pass + 1;
            count_fail = 0;

        elseif z < fail_max_z
            count_fail = count_fail + 1;
            count_pass = 0;
        else
            count_fail = 0;
            count_pass = 0;
        end

        if count_pass >= min_pass_or_fail_count
            finalize_test_helper(show_progress);

            % test passes
            return

        elseif count_fail >= min_pass_or_fail_count
            % enough evidence that p values are non-uniform, fail
            finalize_test_helper(show_progress);

            error(['Found z=%d, indicating that probability values '...
                   'are probably not uniform'], z);
        end

        % not enough evidence for either uniform or non-uniform, redo the
        % test with more iterations
        niter = ceil(niter * grow_niter);
        progress_helper('#');

    end

    finalize_test_helper(show_progress);
    error('Maximum number of attempts reached');

function finalize_test_helper(show_progress)
    if show_progress
        fprintf('\n');
    end

function ps = helper_mccs_get_pvalues(niter, show_progress)
    % output: correlation between expected uniform distribution of p values
    % and those obtained from monte_carl_cluster_stat

    % This test requires statistics functions
    cosmo_skip_test_if_no_external('#stats');

    niter_tfce = 25;
    nsubj = 10;

    ps = zeros(niter, 1);

    % dataset with single features
    ds = struct();
    ds.samples = randn(nsubj, 1);
    ds.sa.targets = ones(nsubj, 1);
    ds.sa.chunks = (1:nsubj)';
    ds.fa = struct();
    ds.a = struct();

    % trivial (singleton) neighborhood
    nh = struct();
    nh.neighbors = {1};
    nh.fa = struct();
    nh.fa.sizes = 1;
    nh.a = struct();
    nh.origin.fa = ds.fa;
    nh.origin.a = ds.a;

    for iter = 1:niter
        opt = struct();
        opt.niter = niter_tfce;
        opt.progress = false;
        opt.h0_mean = 0;
        opt.dh = .1;

        ds.samples = randn(nsubj, 1);

        z = cosmo_montecarlo_cluster_stat(ds, nh, opt);

        ps(iter) = normcdf(z.samples);

        if show_progress
            progress_helper(':');
        end
    end

function progress_helper(what)
    % helper to show progress during the test, which then flushes at the
    % end
    persistent delete_count

    if isempty(delete_count)
        delete_count = 0;
    end

    if strcmp(what, '-clear')
        to_print = repmat(sprintf('\b'), 1, delete_count);
        delete_count = 0;
    else
        to_print = what;
        delete_count = delete_count + numel(what);
    end

    fprintf(to_print);