test map pca

function test_suite = test_map_pca
    % tests for cosmo_map_pca
    %
    % #   For CoSMoMVPA's copyright information and license terms,   #
    % #   see the COPYING file distributed with CoSMoMVPA.           #
    try % assignment of 'localfunctions' is necessary in Matlab >= 2016
        test_functions = localfunctions();
    catch % no problem; early Matlab versions can use initTestSuite fine
    end
    initTestSuite;

function test_map_pca_exceptions
    aet = @(varargin)assertExceptionThrown(@() ...
                                           cosmo_map_pca(varargin{:}), '');

    ds = cosmo_synthetic_dataset();
    pca_params = struct();
    pca_params.mu = [0.0864 0.9248 -0.2001 1.2522 1.0349 0.0568];
    pca_params.coef = [0.0014    0.7569    0.3369   -0.0540    0.3234; ...
                       0.4052   -0.5164    0.4105   -0.3355   -0.0290; ...
                       0.3133    0.0275    0.6618    0.0901    0.1745; ...
                       -0.4294   -0.2190   -0.0420   -0.5306    0.6797; ...
                       0.7090    0.0436   -0.5070    0.0744    0.4815; ...
                       -0.2251   -0.3313    0.1456    0.7677    0.4127];
    pca_params.retain = true(1, 5);

    % test mutually exclusive parameters
    aet(ds, 'pca_params', pca_params, 'pca_explained_count', 3);
    aet(ds, 'pca_explained_ratio', .5, 'pca_explained_count', 3);
    aet(ds, 'pca_explained_ratio', .5, 'pca_params', pca_params);

    % wrong size for coef
    bad_pca_params = pca_params;
    bad_pca_params.coef = 1;
    aet(ds, 'pca_params', bad_pca_params);

    % missing field
    bad_pca_params = pca_params;
    bad_pca_params = rmfield(bad_pca_params, 'mu');
    aet(ds, 'pca_params', bad_pca_params);

    % bad explained_count
    aet(ds, 'pca_explained_count', -1);
    aet(ds, 'pca_explained_count', 1.5);
    aet(ds, 'pca_explained_count', 7);

    % bad explained ratio
    aet(ds, 'pca_explained_ratio', -1);
    aet(ds, 'pca_explained_ratio', -.001);
    aet(ds, 'pca_explained_ratio', 1.001);

function test_map_pca_max_feature_count_exceptions()
    default_max_feature_count = 1000;

    nsamples = 20;
    for max_feature_count = [NaN, ...
                             round(rand() * 10 + 10), ...
                             default_max_feature_count]
        for delta = -1:1
            opt = struct();

            if isnan(max_feature_count)
                nfeatures = default_max_feature_count + delta;
            else
                nfeatures = max_feature_count + delta;
                opt.max_feature_count = max_feature_count;
            end

            data = randn(nsamples, nfeatures);

            expect_exception = delta > 0;

            for use_struct = [false, true]
                if use_struct
                    ds = struct();
                    ds.samples = data;
                else
                    ds = data;
                end

                func_handle = @()cosmo_map_pca(ds, opt);

                if expect_exception
                    assertExceptionThrown(func_handle);
                else
                    % should not raise an exception
                    func_handle();
                end
            end
        end
    end

function test_map_pca_basics
    nfeatures = ceil(rand() * 10 + 10);
    nsamples_train = ceil(rand() * 10 + 10) + nfeatures;
    nsamples_test = ceil(rand() * 10 + 10) + nfeatures;

    train_samples = randn(nsamples_train, nfeatures);
    test_samples = randn(nsamples_test, nfeatures);

    for count = [1 ceil(nfeatures / 2) nfeatures ceil(rand() * nfeatures)]
        helper_test_map_pca_with_count(train_samples, ...
                                       test_samples, count);
    end

    for ratio = [.1 .5 1 rand()]
        helper_test_map_pca_with_ratio(train_samples, ...
                                       test_samples, ratio);
    end

    helper_test_map_pca_with_no_arguments(train_samples, ...
                                          test_samples);

function helper_test_map_pca_with_ratio(train_samples, ...
                                        test_samples, ratio)
    ratio_opt = struct();
    ratio_opt.pca_explained_ratio = ratio;

    % check that ratio and count give the same output
    [unused, param] = cosmo_pca(train_samples);
    count = find(cumsum(param.explained) >= ratio * 100, 1);
    if isempty(count)
        count = numel(param.explained);
    end

    count_opt = struct();
    count_opt.pca_explained_count = count;

    did_throw = true;
    try
        [samples_ratio, params_ratio] = cosmo_map_pca(train_samples, ratio_opt);
        did_throw = false;
    end

    if did_throw
        cosmo_map_pca(train_samples, count_opt);
        assertExceptionThrown(@() ...
                              cosmo_map_pca(train_samples, count_opt));
        assertExceptionThrown(@() ...
                              helper_test_dataset_samples_correspondence(train_samples, ...
                                                                         ratio_opt));

    else
        [samples_count, params_count] = cosmo_map_pca(train_samples, ...
                                                      count_opt);

        assert_almost_equal(samples_ratio, samples_count);
        assert_almost_equal(params_ratio, params_count);

        % verify that raw samples and dataset structures give the same
        % result
        helper_test_dataset_samples_correspondence(train_samples, ...
                                                   ratio_opt);
        helper_test_dataset_samples_correspondence(test_samples, ...
                                                   'pca_params', params_ratio);
    end

    % verify correct result by delegating to count
    if ~did_throw
        helper_test_map_pca_with_count(train_samples, test_samples, count);
    else
        assertExceptionThrown(@() ...
                              helper_test_map_pca_with_count(train_samples, test_samples, ...
                                                             count));
    end

function helper_test_map_pca_with_count(train_samples, test_samples, count)
    assert(numel(count) == 1);
    opt = struct();
    opt.pca_explained_count = count;

    % do PCA in two ways; we assume that cosmo_pca works correctly, and
    % compare its result to cosmo_map_pca
    [ys, params] = cosmo_map_pca(train_samples, opt);
    [pca_samples, pca_params] = cosmo_pca(train_samples, count);

    assert_almost_equal(ys, pca_samples);

    % verify when using samples
    helper_test_dataset_samples_correspondence(train_samples, ...
                                               'pca_explained_count', count);

    % verify params
    expected_params = struct();
    expected_params.mu = pca_params.mu;
    expected_params.coef = pca_params.coef;
    nfeatures = size(train_samples, 2);
    expected_params.retain = (1:nfeatures) <= count;
    assert_almost_equal(expected_params, params);

    [zs, params2] = cosmo_map_pca(test_samples, 'pca_params', params);
    assertEqual(params2, params);

    expected_samples = bsxfun(@minus, test_samples, params.mu) * params.coef;
    assert_almost_equal(zs, expected_samples);

    helper_test_dataset_samples_correspondence(test_samples, ...
                                               'pca_params', params);

function helper_test_map_pca_with_no_arguments(train_samples, test_samples)
    [ys, params] = cosmo_map_pca(train_samples);

    count = size(train_samples, 2);
    [ys_count, params_count] = cosmo_map_pca(train_samples, ...
                                             'pca_explained_count', count);
    assert_almost_equal(ys, ys_count);
    assert_almost_equal(params, params_count);

    helper_test_dataset_samples_correspondence(train_samples);
    helper_test_dataset_samples_correspondence(test_samples, ...
                                               'pca_params', params);

function helper_test_dataset_samples_correspondence(samples, varargin)
    ds = struct();
    ds.samples = samples;

    [result_ds, param_ds] = cosmo_map_pca(ds, varargin{:});
    [result, param] = cosmo_map_pca(samples, varargin{:});

    % verify that output is the same
    expected_ds = struct();
    expected_ds.samples = result;

    expected_ds.fa.comp = 1:size(result, 2);
    expected_ds.a.fdim.labels = {'comp'};
    expected_ds.a.fdim.values = {1:size(samples, 2)};

    assert_almost_equal(expected_ds, result_ds);

    % parameters must be identical
    assert_almost_equal(param_ds, param);

function assert_almost_equal(x, y, msg, tol)
    % helper that supports structs recursively
    if nargin < 4
        tol = 1e-5;
    end
    if nargin < 3
        msg = '';
    end

    if isstruct(x)
        assertTrue(isstruct(y), [msg ' - x is a struct but y is not']);

        fns = fieldnames(x);
        assertEqual(sort(fns), sort(fieldnames(y)), ...
                    [msg ' - fieldname mismatch']);

        n = numel(fns);
        for k = 1:n
            fn = fns{k};

            assert_almost_equal(x.(fn), y.(fn), ...
                                sprintf('%s - mismatch for field %s', msg, fn));
        end

    elseif iscell(x) && iscell(y)
        assertEqual(size(x), size(y), [msg ' - cell size mismatch']);
        for k = 1:numel(x)
            assert_almost_equal(x{k}, y{k}, ...
                                sprintf(' - cell element %d different', k));
        end
    elseif ischar(x) && ischar(y)
        assertEqual(x, y, sprintf('%s - %s ~= %s', msg, x, y));
    elseif islogical(x) && islogical(y)
        assertEqual(x, y, sprintf('%s - boolean array mismatch', msg));
    elseif isnumeric(x) && isnumeric(y)
        if isempty(x) && isempty(y)
            return
        end
        assertElementsAlmostEqual(x, y, 'absolute', tol, [msg ' - different']);
    else
        error('unsupported data type');
    end

function test_pca_map_ratio_unity()
    sizes = [4, 10; ...
             10, 4; ...
             4, 4];

    for k = size(sizes, 1)
        sz = sizes(k, :);
        samples = randn(sz);
        [xs, params] = cosmo_map_pca(samples, 'pca_explained_ratio', 1);

        assertEqual(params.retain, true(1, sz(end) - 1));
    end