function test_suite=test_normalize
% tests for cosmo_normalize
%
% # For CoSMoMVPA's copyright information and license terms, #
% # see the COPYING file distributed with CoSMoMVPA. #
try % assignment of 'localfunctions' is necessary in Matlab >= 2016
test_functions=localfunctions();
catch % no problem; early Matlab versions can use initTestSuite fine
end
initTestSuite;
function test_normalize_basics()
aoe=@(x,y)assertElementsAlmostEqual(x,y,'absolute',1e-3);
ds=struct();
ds.samples=reshape(1:15,5,3)*2;
% demean along first dimension
dsn=cosmo_normalize(ds,'demean',1);
aoe(dsn.samples, [ -4 -4 -4;...
-2 -2 -2;...
0 0 0;...
2 2 2;...
4 4 4 ]);
dsn2=cosmo_normalize(ds,'demean');
aoe(dsn.samples,dsn2.samples);
% demean along second dimension
dsn=cosmo_normalize(ds,'demean',2);
aoe(dsn.samples, [ -10 0 10;...
-10 0 10;...
-10 0 10;...
-10 0 10;...
-10 0 10 ]);
%
% scale to range [-1,1] alnog first dimension
dsn=cosmo_normalize(ds,'scale_unit',1);
aoe(dsn.samples, [ -1 -1 -1;...
-0.5 -0.5 -0.5;...
0 0 0;...
0.5 0.5 0.5;...
1 1 1 ]);
dsn2=cosmo_normalize(ds,'scale_unit');
aoe(dsn.samples,dsn2.samples);
% z-score along first dimension
dsn=cosmo_normalize(ds,'zscore',1);
aoe(dsn.samples, [ -1.2649 -1.2649 -1.2649;...
-0.6325 -0.6325 -0.632;...
0 0 0;...
0.6325 0.6325 0.6325;...
1.2649 1.2649 1.2649 ]);
dsn2=cosmo_normalize(ds,'zscore');
aoe(dsn.samples,dsn2.samples);
% z-score along second dimension
dsn=cosmo_normalize(ds,'zscore',2);
aoe(dsn.samples, [ -1 0 1;...
-1 0 1;...
-1 0 1;...
-1 0 1;...
-1 0 1 ])
%
% use samples 1, 3, and 4 to estimate parameters ('training set'),
% and apply these to samples 2 and 5
ds_train=cosmo_slice(ds,[1 3 4]);
ds_test=cosmo_slice(ds,[2 5]);
[dsn_train,params]=cosmo_normalize(ds_train,'scale_unit', 1);
aoe(dsn_train.samples, [ -1 -1 -1;...
0.3333 0.3333 0.3333;...
1 1 1 ])
p.method='scale_unit';
p.dim=1;
p.min=[ 2 12 22 ];
p.max=[ 8 18 28 ];
assertEqual(params,p);
%
% apply parameters to test dataset
dsn_test=cosmo_normalize(ds_test,params);
aoe(dsn_test.samples,[ -0.3333 -0.33333 -0.33333;...
1.6667 1.6667 1.6667 ]);
[tr,params]=cosmo_normalize(zeros(4,0),'zscore');
te=cosmo_normalize(zeros(2,0),params);
assertEqual(tr,zeros(4,0));
assertEqual(te,zeros(2,0));
[tr,params]=cosmo_normalize(zeros(4,0),'demean');
te=cosmo_normalize(zeros(2,0),params);
assertEqual(tr,zeros(4,0));
assertEqual(te,zeros(2,0));
[tr,params]=cosmo_normalize(zeros(4,0),'scale_unit');
te=cosmo_normalize(zeros(2,0),params);
assertEqual(tr,zeros(4,0));
assertEqual(te,zeros(2,0));
dsn=cosmo_normalize(ds,'');
assertEqual(dsn,ds);
warning_state=cosmo_warning();
warning_resetter=onCleanup(@()cosmo_warning(warning_state));
cosmo_warning('off');
ds.samples(1,1)=NaN;
dsn=cosmo_normalize(ds,'zscore');
assert(all(isnan(dsn.samples(:,1))));
assert(all(all(~isnan(dsn.samples(:,2:3)))));
function test_normalize_random_data_train
dim_opt=struct();
dim_opt.dim={[],1,2};
dim_opt.method={'zscore','demean','scale_unit'};
combis=cosmo_cartprod(dim_opt);
for k=1:numel(combis)
opt=combis{k};
method=opt.method;
args={method};
if isempty(opt.dim)
dim=1;
else
dim=opt.dim;
args{end+1}=dim;
end
nsamples=ceil(rand()*10+10);
nfeatures=ceil(rand()*10+10)+nsamples;
samples=randn(nsamples,nfeatures);
ds=struct();
ds.samples=samples;
ds.sa.targets=1+mod(cosmo_randperm(nsamples),3)';
[res_ds,res_param]=cosmo_normalize(ds,args{:});
mu=mean(samples,dim);
sd=std(samples,[],dim);
expected_param=struct();
expected_param.dim=dim;
expected_param.method=method;
switch method
case 'demean'
expected_samples=bsxfun(@minus,samples,mu);
expected_param.mu=mu;
case 'zscore'
expected_samples=bsxfun(@rdivide,...
bsxfun(@minus,samples,mu),sd);
expected_param.mu=mu;
expected_param.sigma=sd;
case 'scale_unit'
mn=min(samples,[],dim);
mx=max(samples,[],dim);
delta=mx-mn;
expected_samples=bsxfun(@rdivide,...
bsxfun(@minus,samples,mn),delta)*2-1;
expected_param.min=mn;
expected_param.max=mx;
otherwise
assert(false);
end
assertElementsAlmostEqual(res_ds.samples,expected_samples);
assert_struct_almost_equal(res_param,expected_param);
% new dataset, apply parameters
samples=randn(size(samples));
ds.samples=samples;
ds.sa.targets=1+mod(cosmo_randperm(nsamples),3)';
[res2_ds,res2_param]=cosmo_normalize(ds,res_param);
switch method
case 'demean'
expected_samples=bsxfun(@minus,samples,mu);
case 'zscore'
expected_samples=bsxfun(@rdivide,...
bsxfun(@minus,samples,mu),sd);
case 'scale_unit'
expected_samples=bsxfun(@rdivide,...
bsxfun(@minus,samples,mn),delta)*2-1;
otherwise
assert(false);
end
assertElementsAlmostEqual(res2_ds.samples,expected_samples);
assert_struct_almost_equal(res2_param,res_param);
end
function assert_struct_almost_equal(x,y)
keys=fieldnames(x);
assertEqual(sort(keys),sort(fieldnames(y)))
for k=1:numel(keys)
key=keys{k};
v=x.(key);
w=y.(key);
if isnumeric(v)
func=@assertElementsAlmostEqual;
else
func=@assertEqual;
end
func(v,w);
end
function test_normalize_exceptions()
ds=cosmo_synthetic_dataset();
aet=@(varargin)assertExceptionThrown(@()...
cosmo_normalize(ds,varargin{:}),'');
aet('zscore3');
aet('zscore2');
aet('zscore1');
aet('foo');
aet('zscore1',1);
aet('zscore1',2);
% bad dimension
[unused,params]=cosmo_normalize(ds,'zscore',1);
aet(params,2);
aet(params,2);
bad_params=params;
bad_params.dim=2;
aet(bad_params,1);
% illegal second input
aet({'foo'});