function test_suite = test_normalize
% tests for cosmo_normalize
%
% # For CoSMoMVPA's copyright information and license terms, #
% # see the COPYING file distributed with CoSMoMVPA. #
try % assignment of 'localfunctions' is necessary in Matlab >= 2016
test_functions = localfunctions();
catch % no problem; early Matlab versions can use initTestSuite fine
end
initTestSuite;
function test_normalize_basics()
aoe = @(x, y)assertElementsAlmostEqual(x, y, 'absolute', 1e-3);
ds = struct();
ds.samples = reshape(1:15, 5, 3) * 2;
% demean along first dimension
dsn = cosmo_normalize(ds, 'demean', 1);
aoe(dsn.samples, [-4 -4 -4; ...
-2 -2 -2; ...
0 0 0; ...
2 2 2; ...
4 4 4]);
dsn2 = cosmo_normalize(ds, 'demean');
aoe(dsn.samples, dsn2.samples);
% demean along second dimension
dsn = cosmo_normalize(ds, 'demean', 2);
aoe(dsn.samples, [-10 0 10; ...
-10 0 10; ...
-10 0 10; ...
-10 0 10; ...
-10 0 10]);
%
% scale to range [-1,1] alnog first dimension
dsn = cosmo_normalize(ds, 'scale_unit', 1);
aoe(dsn.samples, [-1 -1 -1; ...
-0.5 -0.5 -0.5; ...
0 0 0; ...
0.5 0.5 0.5; ...
1 1 1]);
dsn2 = cosmo_normalize(ds, 'scale_unit');
aoe(dsn.samples, dsn2.samples);
% z-score along first dimension
dsn = cosmo_normalize(ds, 'zscore', 1);
aoe(dsn.samples, [-1.2649 -1.2649 -1.2649; ...
-0.6325 -0.6325 -0.632; ...
0 0 0; ...
0.6325 0.6325 0.6325; ...
1.2649 1.2649 1.2649]);
dsn2 = cosmo_normalize(ds, 'zscore');
aoe(dsn.samples, dsn2.samples);
% z-score along second dimension
dsn = cosmo_normalize(ds, 'zscore', 2);
aoe(dsn.samples, [-1 0 1; ...
-1 0 1; ...
-1 0 1; ...
-1 0 1; ...
-1 0 1]);
%
% use samples 1, 3, and 4 to estimate parameters ('training set'),
% and apply these to samples 2 and 5
ds_train = cosmo_slice(ds, [1 3 4]);
ds_test = cosmo_slice(ds, [2 5]);
[dsn_train, params] = cosmo_normalize(ds_train, 'scale_unit', 1);
aoe(dsn_train.samples, [-1 -1 -1; ...
0.3333 0.3333 0.3333; ...
1 1 1]);
p.method = 'scale_unit';
p.dim = 1;
p.min = [2 12 22];
p.max = [8 18 28];
assertEqual(params, p);
%
% apply parameters to test dataset
dsn_test = cosmo_normalize(ds_test, params);
aoe(dsn_test.samples, [-0.3333 -0.33333 -0.33333; ...
1.6667 1.6667 1.6667]);
[tr, params] = cosmo_normalize(zeros(4, 0), 'zscore');
te = cosmo_normalize(zeros(2, 0), params);
assertEqual(tr, zeros(4, 0));
assertEqual(te, zeros(2, 0));
[tr, params] = cosmo_normalize(zeros(4, 0), 'demean');
te = cosmo_normalize(zeros(2, 0), params);
assertEqual(tr, zeros(4, 0));
assertEqual(te, zeros(2, 0));
[tr, params] = cosmo_normalize(zeros(4, 0), 'scale_unit');
te = cosmo_normalize(zeros(2, 0), params);
assertEqual(tr, zeros(4, 0));
assertEqual(te, zeros(2, 0));
dsn = cosmo_normalize(ds, '');
assertEqual(dsn, ds);
warning_state = cosmo_warning();
warning_resetter = onCleanup(@()cosmo_warning(warning_state));
cosmo_warning('off');
ds.samples(1, 1) = NaN;
dsn = cosmo_normalize(ds, 'zscore');
assert(all(isnan(dsn.samples(:, 1))));
assert(all(all(~isnan(dsn.samples(:, 2:3)))));
function test_normalize_random_data_train
dim_opt = struct();
dim_opt.dim = {[], 1, 2};
dim_opt.method = {'zscore', 'demean', 'scale_unit'};
combis = cosmo_cartprod(dim_opt);
for k = 1:numel(combis)
opt = combis{k};
method = opt.method;
args = {method};
if isempty(opt.dim)
dim = 1;
else
dim = opt.dim;
args{end + 1} = dim;
end
nsamples = ceil(rand() * 10 + 10);
nfeatures = ceil(rand() * 10 + 10) + nsamples;
samples = randn(nsamples, nfeatures);
ds = struct();
ds.samples = samples;
ds.sa.targets = 1 + mod(cosmo_randperm(nsamples), 3)';
[res_ds, res_param] = cosmo_normalize(ds, args{:});
mu = mean(samples, dim);
sd = std(samples, [], dim);
expected_param = struct();
expected_param.dim = dim;
expected_param.method = method;
switch method
case 'demean'
expected_samples = bsxfun(@minus, samples, mu);
expected_param.mu = mu;
case 'zscore'
expected_samples = bsxfun(@rdivide, ...
bsxfun(@minus, samples, mu), sd);
expected_param.mu = mu;
expected_param.sigma = sd;
case 'scale_unit'
mn = min(samples, [], dim);
mx = max(samples, [], dim);
delta = mx - mn;
expected_samples = bsxfun(@rdivide, ...
bsxfun(@minus, samples, mn), delta) * 2 - 1;
expected_param.min = mn;
expected_param.max = mx;
otherwise
assert(false);
end
assertElementsAlmostEqual(res_ds.samples, expected_samples);
assert_struct_almost_equal(res_param, expected_param);
% new dataset, apply parameters
samples = randn(size(samples));
ds.samples = samples;
ds.sa.targets = 1 + mod(cosmo_randperm(nsamples), 3)';
[res2_ds, res2_param] = cosmo_normalize(ds, res_param);
switch method
case 'demean'
expected_samples = bsxfun(@minus, samples, mu);
case 'zscore'
expected_samples = bsxfun(@rdivide, ...
bsxfun(@minus, samples, mu), sd);
case 'scale_unit'
expected_samples = bsxfun(@rdivide, ...
bsxfun(@minus, samples, mn), delta) * 2 - 1;
otherwise
assert(false);
end
assertElementsAlmostEqual(res2_ds.samples, expected_samples);
assert_struct_almost_equal(res2_param, res_param);
end
function assert_struct_almost_equal(x, y)
keys = fieldnames(x);
assertEqual(sort(keys), sort(fieldnames(y)));
for k = 1:numel(keys)
key = keys{k};
v = x.(key);
w = y.(key);
if isnumeric(v)
func = @assertElementsAlmostEqual;
else
func = @assertEqual;
end
func(v, w);
end
function test_normalize_exceptions()
ds = cosmo_synthetic_dataset();
aet = @(varargin)assertExceptionThrown(@() ...
cosmo_normalize(ds, varargin{:}), '');
aet('zscore3');
aet('zscore2');
aet('zscore1');
aet('foo');
aet('zscore1', 1);
aet('zscore1', 2);
% bad dimension
[unused, params] = cosmo_normalize(ds, 'zscore', 1);
aet(params, 2);
aet(params, 2);
bad_params = params;
bad_params.dim = 2;
aet(bad_params, 1);
% illegal second input
aet({'foo'});