test map pca

function test_suite=test_map_pca
% tests for cosmo_map_pca
%
% #   For CoSMoMVPA's copyright information and license terms,   #
% #   see the COPYING file distributed with CoSMoMVPA.           #
    try % assignment of 'localfunctions' is necessary in Matlab >= 2016
        test_functions=localfunctions();
    catch % no problem; early Matlab versions can use initTestSuite fine
    end
    initTestSuite;

function test_map_pca_exceptions
    aet=@(varargin)assertExceptionThrown(@()...
                        cosmo_map_pca(varargin{:}),'');

    ds=cosmo_synthetic_dataset();
    pca_params=struct();
    pca_params.mu=[0.0864 0.9248 -0.2001 1.2522 1.0349 0.0568];
    pca_params.coef=[ 0.0014    0.7569    0.3369   -0.0540    0.3234;...
                      0.4052   -0.5164    0.4105   -0.3355   -0.0290;...
                      0.3133    0.0275    0.6618    0.0901    0.1745;...
                     -0.4294   -0.2190   -0.0420   -0.5306    0.6797;...
                      0.7090    0.0436   -0.5070    0.0744    0.4815;...
                     -0.2251   -0.3313    0.1456    0.7677    0.4127];
    pca_params.retain=true(1,5);

    % test mutually exclusive parameters
    aet(ds,'pca_params',pca_params,'pca_explained_count',3);
    aet(ds,'pca_explained_ratio',.5,'pca_explained_count',3);
    aet(ds,'pca_explained_ratio',.5,'pca_params',pca_params);

    % wrong size for coef
    bad_pca_params=pca_params;
    bad_pca_params.coef=1;
    aet(ds,'pca_params',bad_pca_params);

    % missing field
    bad_pca_params=pca_params;
    bad_pca_params=rmfield(bad_pca_params,'mu');
    aet(ds,'pca_params',bad_pca_params);

    % bad explained_count
    aet(ds,'pca_explained_count',-1);
    aet(ds,'pca_explained_count',1.5);
    aet(ds,'pca_explained_count',7);

    % bad explained ratio
    aet(ds,'pca_explained_ratio',-1);
    aet(ds,'pca_explained_ratio',-.001);
    aet(ds,'pca_explained_ratio',1.001);

function test_map_pca_max_feature_count_exceptions()
    default_max_feature_count=1000;

    nsamples=20;
    for max_feature_count=[NaN, ...
                            round(rand()*10+10), ...
                            default_max_feature_count]
        for delta=(-1:1)
            opt=struct();

            if isnan(max_feature_count)
                nfeatures=default_max_feature_count+delta;
            else
                nfeatures=max_feature_count+delta;
                opt.max_feature_count=max_feature_count;
            end

            data=randn(nsamples,nfeatures);

            expect_exception=delta>0;

            for use_struct=[false,true]
                if use_struct
                    ds=struct();
                    ds.samples=data;
                else
                    ds=data;
                end

                func_handle=@()cosmo_map_pca(ds,opt);

                if expect_exception
                    assertExceptionThrown(func_handle);
                else
                    % should not raise an exception
                    func_handle();
                end
            end
        end
    end




function test_map_pca_basics
    nfeatures=ceil(rand()*10+10);
    nsamples_train=ceil(rand()*10+10)+nfeatures;
    nsamples_test=ceil(rand()*10+10)+nfeatures;

    train_samples=randn(nsamples_train,nfeatures);
    test_samples=randn(nsamples_test,nfeatures);

    for count=[1 ceil(nfeatures/2) nfeatures ceil(rand()*nfeatures)]
        helper_test_map_pca_with_count(train_samples,...
                                                test_samples,count);
    end

    for ratio=[.1 .5 1 rand()]
        helper_test_map_pca_with_ratio(train_samples,...
                                                test_samples,ratio);
    end

    helper_test_map_pca_with_no_arguments(train_samples,...
                                                test_samples);

function helper_test_map_pca_with_ratio(train_samples,...
                                                test_samples,ratio)
    ratio_opt=struct();
    ratio_opt.pca_explained_ratio=ratio;

    % check that ratio and count give the same output
    [unused,param]=cosmo_pca(train_samples);
    count=find(cumsum(param.explained)>=ratio*100,1);
    if isempty(count)
        count=numel(param.explained);
    end

    count_opt=struct();
    count_opt.pca_explained_count=count;

    did_throw=true;
    try
        [samples_ratio,params_ratio]=cosmo_map_pca(train_samples,ratio_opt);
        did_throw=false;
    end

    if did_throw
        cosmo_map_pca(train_samples,count_opt)
        assertExceptionThrown(@()...
                cosmo_map_pca(train_samples,count_opt));
        assertExceptionThrown(@()...
                helper_test_dataset_samples_correspondence(train_samples,...
                                            ratio_opt));

    else
        [samples_count,params_count]=cosmo_map_pca(train_samples,...
                                            count_opt);

        assert_almost_equal(samples_ratio,samples_count);
        assert_almost_equal(params_ratio,params_count);

        % verify that raw samples and dataset structures give the same
        % result
        helper_test_dataset_samples_correspondence(train_samples,...
                                                    ratio_opt);
        helper_test_dataset_samples_correspondence(test_samples,...
                                                'pca_params',params_ratio);
    end

    % verify correct result by delegating to count
    if ~did_throw
        helper_test_map_pca_with_count(train_samples,test_samples,count);
    else
        assertExceptionThrown(@()...
            helper_test_map_pca_with_count(train_samples,test_samples,...
                                            count));
    end


function helper_test_map_pca_with_count(train_samples,test_samples,count)
    assert(numel(count)==1);
    opt=struct();
    opt.pca_explained_count=count;

    % do PCA in two ways; we assume that cosmo_pca works correctly, and
    % compare its result to cosmo_map_pca
    [ys,params]=cosmo_map_pca(train_samples,opt);
    [pca_samples,pca_params]=cosmo_pca(train_samples,count);

    assert_almost_equal(ys,pca_samples);

    % verify when using samples
    helper_test_dataset_samples_correspondence(train_samples, ...
                                    'pca_explained_count',count);

    % verify params
    expected_params=struct();
    expected_params.mu=pca_params.mu;
    expected_params.coef=pca_params.coef;
    nfeatures=size(train_samples,2);
    expected_params.retain=(1:nfeatures)<=count;
    assert_almost_equal(expected_params,params);


    [zs,params2]=cosmo_map_pca(test_samples,'pca_params',params);
    assertEqual(params2,params);

    expected_samples=bsxfun(@minus,test_samples,params.mu)*params.coef;
    assert_almost_equal(zs,expected_samples);

    helper_test_dataset_samples_correspondence(test_samples,...
                                'pca_params',params);


function helper_test_map_pca_with_no_arguments(train_samples,test_samples)
    [ys,params]=cosmo_map_pca(train_samples);

    count=size(train_samples,2);
    [ys_count,params_count]=cosmo_map_pca(train_samples,...
                                    'pca_explained_count',count);
    assert_almost_equal(ys,ys_count);
    assert_almost_equal(params,params_count);

    helper_test_dataset_samples_correspondence(train_samples);
    helper_test_dataset_samples_correspondence(test_samples,...
                                            'pca_params',params);



function helper_test_dataset_samples_correspondence(samples, varargin)
    ds=struct();
    ds.samples=samples;

    [result_ds,param_ds]=cosmo_map_pca(ds,varargin{:});
    [result,param]=cosmo_map_pca(samples,varargin{:});

    % verify that output is the same
    expected_ds=struct();
    expected_ds.samples=result;

    expected_ds.fa.comp=1:size(result,2);
    expected_ds.a.fdim.labels={'comp'};
    expected_ds.a.fdim.values={1:size(samples,2)};

    assert_almost_equal(expected_ds,result_ds);

    % parameters must be identical
    assert_almost_equal(param_ds,param);


function assert_almost_equal(x,y,msg,tol)
    % helper that supports structs recursively
    if nargin<4
        tol=1e-5;
    end
    if nargin<3
        msg='';
    end

    if isstruct(x)
        assertTrue(isstruct(y),[msg ' - x is a struct but y is not']);

        fns=fieldnames(x);
        assertEqual(sort(fns),sort(fieldnames(y)),...
                                    [msg ' - fieldname mismatch'])

        n=numel(fns);
        for k=1:n
            fn=fns{k};

            assert_almost_equal(x.(fn),y.(fn),...
                            sprintf('%s - mismatch for field %s',msg,fn))
        end

    elseif iscell(x) && iscell(y)
        assertEqual(size(x),size(y),[msg ' - cell size mismatch']);
        for k=1:numel(x)
            assert_almost_equal(x{k},y{k},...
                        sprintf(' - cell element %d different',k));
        end
    elseif ischar(x) && ischar(y)
        assertEqual(x,y,sprintf('%s - %s ~= %s',msg,x,y));
    elseif islogical(x) && islogical(y)
        assertEqual(x,y,sprintf('%s - boolean array mismatch',msg));
    elseif isnumeric(x) && isnumeric(y)
        if isempty(x) && isempty(y)
            return;
        end
        assertElementsAlmostEqual(x,y,'absolute',tol,[msg ' - different']);
    else
        error('unsupported data type');
    end


function test_pca_map_ratio_unity()
    sizes=[4,10;...
           10,4;...
           4,4];

    for k=size(sizes,1);
        sz=sizes(k,:);
        samples=randn(sz);
        [xs,params]=cosmo_map_pca(samples,'pca_explained_ratio',1);

        assertEqual(params.retain,true(1,sz(end)-1));
    end