test normalize

function test_suite = test_normalize
    % tests for cosmo_normalize
    %
    % #   For CoSMoMVPA's copyright information and license terms,   #
    % #   see the COPYING file distributed with CoSMoMVPA.           #
    try % assignment of 'localfunctions' is necessary in Matlab >= 2016
        test_functions = localfunctions();
    catch % no problem; early Matlab versions can use initTestSuite fine
    end
    initTestSuite;

function test_normalize_basics()

    aoe = @(x, y)assertElementsAlmostEqual(x, y, 'absolute', 1e-3);

    ds = struct();
    ds.samples = reshape(1:15, 5, 3) * 2;

    % demean along first dimension
    dsn = cosmo_normalize(ds, 'demean', 1);
    aoe(dsn.samples,   [-4        -4        -4; ...
                        -2        -2        -2; ...
                        0         0         0; ...
                        2         2         2; ...
                        4         4         4]);

    dsn2 = cosmo_normalize(ds, 'demean');
    aoe(dsn.samples, dsn2.samples);

    % demean along second dimension
    dsn = cosmo_normalize(ds, 'demean', 2);
    aoe(dsn.samples, [-10         0        10; ...
                      -10         0        10; ...
                      -10         0        10; ...
                      -10         0        10; ...
                      -10         0        10]);

    %
    % scale to range [-1,1] alnog first dimension
    dsn = cosmo_normalize(ds, 'scale_unit', 1);
    aoe(dsn.samples, [-1        -1        -1; ...
                      -0.5      -0.5      -0.5; ...
                      0         0         0; ...
                      0.5      0.5       0.5; ...
                      1         1         1]);
    dsn2 = cosmo_normalize(ds, 'scale_unit');
    aoe(dsn.samples, dsn2.samples);

    % z-score along first dimension
    dsn = cosmo_normalize(ds, 'zscore', 1);
    aoe(dsn.samples, [-1.2649     -1.2649     -1.2649; ...
                      -0.6325    -0.6325    -0.632; ...
                      0         0         0; ...
                      0.6325     0.6325     0.6325; ...
                      1.2649      1.2649      1.2649]);
    dsn2 = cosmo_normalize(ds, 'zscore');
    aoe(dsn.samples, dsn2.samples);

    % z-score along second dimension
    dsn = cosmo_normalize(ds, 'zscore', 2);
    aoe(dsn.samples, [-1         0         1; ...
                      -1         0         1; ...
                      -1         0         1; ...
                      -1         0         1; ...
                      -1         0         1]);
    %
    % use samples 1, 3, and 4 to estimate parameters ('training set'),
    % and apply these to samples 2 and 5
    ds_train = cosmo_slice(ds, [1 3 4]);
    ds_test = cosmo_slice(ds, [2 5]);
    [dsn_train, params] = cosmo_normalize(ds_train, 'scale_unit', 1);
    aoe(dsn_train.samples, [-1        -1        -1; ...
                            0.3333     0.3333     0.3333; ...
                            1         1         1]);
    p.method = 'scale_unit';
    p.dim = 1;
    p.min = [2        12        22];
    p.max = [8        18        28];
    assertEqual(params, p);
    %
    % apply parameters to test dataset
    dsn_test = cosmo_normalize(ds_test, params);
    aoe(dsn_test.samples, [-0.3333    -0.33333    -0.33333; ...
                           1.6667      1.6667    1.6667]);

    [tr, params] = cosmo_normalize(zeros(4, 0), 'zscore');
    te = cosmo_normalize(zeros(2, 0), params);
    assertEqual(tr, zeros(4, 0));
    assertEqual(te, zeros(2, 0));

    [tr, params] = cosmo_normalize(zeros(4, 0), 'demean');
    te = cosmo_normalize(zeros(2, 0), params);
    assertEqual(tr, zeros(4, 0));
    assertEqual(te, zeros(2, 0));

    [tr, params] = cosmo_normalize(zeros(4, 0), 'scale_unit');
    te = cosmo_normalize(zeros(2, 0), params);
    assertEqual(tr, zeros(4, 0));
    assertEqual(te, zeros(2, 0));

    dsn = cosmo_normalize(ds, '');
    assertEqual(dsn, ds);

    warning_state = cosmo_warning();
    warning_resetter = onCleanup(@()cosmo_warning(warning_state));
    cosmo_warning('off');

    ds.samples(1, 1) = NaN;
    dsn = cosmo_normalize(ds, 'zscore');
    assert(all(isnan(dsn.samples(:, 1))));
    assert(all(all(~isnan(dsn.samples(:, 2:3)))));

function test_normalize_random_data_train

    dim_opt = struct();
    dim_opt.dim = {[], 1, 2};
    dim_opt.method = {'zscore', 'demean', 'scale_unit'};

    combis = cosmo_cartprod(dim_opt);
    for k = 1:numel(combis)
        opt = combis{k};

        method = opt.method;
        args = {method};
        if isempty(opt.dim)
            dim = 1;
        else
            dim = opt.dim;
            args{end + 1} = dim;
        end

        nsamples = ceil(rand() * 10 + 10);
        nfeatures = ceil(rand() * 10 + 10) + nsamples;

        samples = randn(nsamples, nfeatures);

        ds = struct();
        ds.samples = samples;
        ds.sa.targets = 1 + mod(cosmo_randperm(nsamples), 3)';

        [res_ds, res_param] = cosmo_normalize(ds, args{:});

        mu = mean(samples, dim);
        sd = std(samples, [], dim);

        expected_param = struct();
        expected_param.dim = dim;
        expected_param.method = method;

        switch method
            case 'demean'
                expected_samples = bsxfun(@minus, samples, mu);
                expected_param.mu = mu;

            case 'zscore'
                expected_samples = bsxfun(@rdivide, ...
                                          bsxfun(@minus, samples, mu), sd);
                expected_param.mu = mu;
                expected_param.sigma = sd;

            case 'scale_unit'
                mn = min(samples, [], dim);
                mx = max(samples, [], dim);
                delta = mx - mn;

                expected_samples = bsxfun(@rdivide, ...
                                          bsxfun(@minus, samples, mn), delta) * 2 - 1;

                expected_param.min = mn;
                expected_param.max = mx;

            otherwise
                assert(false);

        end

        assertElementsAlmostEqual(res_ds.samples, expected_samples);
        assert_struct_almost_equal(res_param, expected_param);

        % new dataset, apply parameters
        samples = randn(size(samples));
        ds.samples = samples;
        ds.sa.targets = 1 + mod(cosmo_randperm(nsamples), 3)';

        [res2_ds, res2_param] = cosmo_normalize(ds, res_param);

        switch method
            case 'demean'
                expected_samples = bsxfun(@minus, samples, mu);

            case 'zscore'
                expected_samples = bsxfun(@rdivide, ...
                                          bsxfun(@minus, samples, mu), sd);

            case 'scale_unit'
                expected_samples = bsxfun(@rdivide, ...
                                          bsxfun(@minus, samples, mn), delta) * 2 - 1;

            otherwise
                assert(false);
        end

        assertElementsAlmostEqual(res2_ds.samples, expected_samples);
        assert_struct_almost_equal(res2_param, res_param);
    end

function assert_struct_almost_equal(x, y)
    keys = fieldnames(x);
    assertEqual(sort(keys), sort(fieldnames(y)));

    for k = 1:numel(keys)
        key = keys{k};
        v = x.(key);
        w = y.(key);

        if isnumeric(v)
            func = @assertElementsAlmostEqual;
        else
            func = @assertEqual;
        end

        func(v, w);
    end

function test_normalize_exceptions()
    ds = cosmo_synthetic_dataset();
    aet = @(varargin)assertExceptionThrown(@() ...
                                           cosmo_normalize(ds, varargin{:}), '');

    aet('zscore3');
    aet('zscore2');
    aet('zscore1');
    aet('foo');
    aet('zscore1', 1);
    aet('zscore1', 2);

    % bad dimension
    [unused, params] = cosmo_normalize(ds, 'zscore', 1);
    aet(params, 2);

    aet(params, 2);
    bad_params = params;
    bad_params.dim = 2;
    aet(bad_params, 1);

    % illegal second input
    aet({'foo'});