test tiedrank

function test_suite = test_tiedrank
    % tests for cosmo_tiedrank
    %
    % #   For CoSMoMVPA's copyright information and license terms,   #
    % #   see the COPYING file distributed with CoSMoMVPA.           #
    try % assignment of 'localfunctions' is necessary in Matlab >= 2016
        test_functions = localfunctions();
    catch % no problem; early Matlab versions can use initTestSuite fine
    end
    initTestSuite;

function test_exceptions()
    aet = @(varargin)assertExceptionThrown(@() ...
                                           cosmo_tiedrank(varargin{:}), '');
    % illegal first argument
    aet('foo');
    aet({1, 2});
    aet(false);

    % illegal second argument
    aet([1 2], 'foo');
    aet([1 2], .5);
    aet([1 2], -1);

function test_tiedrank_simple_input()
    args_results = {{rand()}, 1; ...
                    {[3 2 1]}, [1 1 1]; ...
                    {[3 2 1], 1}, [1 1 1]; ...
                    {[3 2 1], 2}, [3 2 1]; ...
                    {[5 NaN 3], 2}, [2 NaN 1] ...
                   };
    n = size(args_results, 1);
    for k = 1:n
        row = args_results(k, :);
        args = row{1};
        result = row{2};
        assertElementsAlmostEqual(cosmo_tiedrank(args{:}), result);
    end

function test_tiedrank_vector_input()
    wrapper_func = @(x)cosmo_tiedrank(x, 1);
    helper_test_tiedrank_vector_with(wrapper_func);

function test_tiedrank_ndim_input()
    for ndim = 2:5
        data_size = zeros(1, ndim);

        for dim = 1:ndim
            data_size(dim) = randint(3) + 1;
        end

        for dim = 1:ndim
            func = @(x)cosmo_tiedrank(x, dim);

            helper_test_tiedrank_ndim_with_size(func, data_size, dim);
        end
    end

function test_tiedrank_singleton_input()
    sizes = {[1 2 3], [1 1 3], [3 1 1], [1 3 3], [1 1 3 3], ...
             [1, 1 + randint(10)], [1 + randint(10), 1]};
    for k = 1:numel(sizes)
        data_size = sizes{k};
        ndim = numel(data_size);

        for dim = 1:ndim
            func = @(x)cosmo_tiedrank(x, dim);
            helper_test_tiedrank_ndim_with_size(func, data_size, dim);
        end
    end

function test_builtin_matlab()
    % sanity check that tests whether our test
    if cosmo_skip_test_if_no_external('!tiedrank')
        return
    end

    if ~cosmo_wtf('is_matlab')
        cosmo_notify_test_skipped('not running Matlab');
        return
    end

    helper_test_tiedrank_vector_with(@tiedrank);

function r = randint(n)
    r = ceil(rand() * n);

function helper_test_tiedrank_vector_with(func)
    dim = 1;
    nsamples = randint(100) + 100;
    data_size = [1, 1];
    data_size(dim) = nsamples;
    helper_test_tiedrank_ndim_with_size(func, data_size, dim);

function helper_test_tiedrank_ndim_with_size(func, data_size, dim)
    data = generate_random_tied_data(data_size);
    expected_result = func(data);

    assert_result_matches(data, dim, expected_result);

function data = generate_random_tied_data(data_size)
    nan_ratio = .1;
    tied_ratio_min = .5;
    tied_ratio_max = .6;

    data = rand(data_size);
    n = numel(data);

    tied_ratio = tied_ratio_min + rand() * (tied_ratio_max - tied_ratio_min);
    tied_c = 0;

    while tied_c * n < tied_ratio
        p = randint(n);
        q = randint(n);
        data(p) = data(q);
        tied_c = tied_c + 1;
    end

    nan_msk = rand(data_size) < nan_ratio;
    nan_msk(randint(n)) = true; % at least one NaN
    data(nan_msk) = NaN;

function result = builtin_matlab_tiedrank_wrapper(data)
    if cosmo_skip_test_if_no_external('!tiedrank') || ...
                ~cosmo_wtf('is_matlab')
        return
    end

    result = tiedrank(data);

function assert_result_matches(data, dim, result)
    % data and result are both N-dimensional arrays and must be of the same
    % size. result should be equal to the tiedrank output from data
    assertEqual(size(data), size(result));

    nan_msk = isnan(data);
    assertEqual(isnan(result), nan_msk);

    if dim > numel(size(data))
        % all non-NaN values must be 1
        assertTrue(all(result(~nan_msk) == 1));
        return
    end

    % make the dimension along which tiedrank is applied the first
    % dimension in the *_sh variables
    shift_count = dim - 1;
    data_sh = shiftdim(data, shift_count);
    result_sh = shiftdim(result, shift_count);

    % reshape in matrix form so that first dimension is the output for
    % each feature and the second dimension represents all other features
    nsamples = size(data_sh, 1);
    nfeatures = numel(data) / nsamples;
    data_mat = reshape(data_sh, nsamples, nfeatures);
    result_mat = reshape(result_sh, nsamples, nfeatures);

    for k = 1:nfeatures
        % test k-th feature
        d = data_mat(:, k);
        r = result_mat(:, k);

        % only consider non-nan values
        dkeep = d(~isnan(d));
        rkeep = r(~isnan(r));

        % sort the values
        [s, idx] = sort(dkeep, 1);
        nkeep = numel(idx);

        % allocate space for expected output for k-th feature
        expected_rkeep = zeros(nkeep, 1);
        m = 0;
        while m < nkeep
            m = m + 1;

            % index of first value in a possible row of equal values
            first = m;
            c = 0;

            while m < nkeep && s(m) == s(m + 1)
                c = c + 1;
                m = m + 1;
                if c > numel(d)
                    error('no convergence');
                end
            end

            % the number of equal values starting at first is equal to c+1
            expected_rkeep(idx(first + (0:c))) = c / 2 + first;
        end

        if isempty(expected_rkeep)
            assert(isempty(rkeep));
        else
            assertElementsAlmostEqual(expected_rkeep, rkeep);
        end
    end