function [ds_useful,msk]=cosmo_remove_useless_data(ds, dim, type)
% remove 'useless' (constant and/or non-finite) samples or features
%
% ds_useful=cosmo_remove_useless_data(ds[, dim][, type)]
%
% Inputs:
% ds dataset struct
% dim optional dimension along which useless data is found:
% dim=1: along samples (keeping useful features)
% dim=2: along features (keeping useful samples)
% default: 1
% type optional type of usefulness, one of:
% 'variable' : keep non-constant data
% 'finite' : keep non-finite (NaN or Inf) data
% 'all' : keep variable and finite data
% default: 'all'
% Output:
% ds_useful dataset struct sliced so that useless data along the
% dim-th dimension is removed. Data is not considered as
% constant if it is not NaN and there is a single row (or
% column).
%
% Examples:
% ds=cosmo_synthetic_dataset('nchunks',2);
% %
% % make some data elements useless
% ds.samples(1,1)=NaN; % non-finite
% ds.samples(2,3)=Inf; % non-finite
% ds.samples(3,:)=7; % constant along sample dimension (dim=1)
% ds.samples(:,4)=7; % constant along feature dimension (dim=2)
% %
% cosmo_disp(ds.samples);
% %|| [ NaN -1.05 -0.262 7 -0.209 0.844
% %|| 0.584 0.915 Inf 7 2.39 1.86
% %|| 7 7 7 7 7 7
% %|| -0.518 1.84 0.482 7 1.39 0.502 ]
% %
% % remove all features that are useless
% ds_useful=cosmo_remove_useless_data(ds);
% cosmo_disp(ds_useful.samples);
% %|| [ -1.05 -0.209 0.844
% %|| 0.915 2.39 1.86
% %|| 7 7 7
% %|| 1.84 1.39 0.502 ]
% %
% % remove all features that are constant, and get the logical mask
% % of the kept features
% [ds_variable,msk]=cosmo_remove_useless_data(ds,1,'variable');
% cosmo_disp(ds_variable.samples);
% %|| [ -1.05 -0.262 -0.209 0.844
% %|| 0.915 Inf 2.39 1.86
% %|| 7 7 7 7
% %|| 1.84 0.482 1.39 0.502 ]
% cosmo_disp(msk)
% %|| [ false true true false true true ]
% %
% % remove all features that are not finite
% ds_finite=cosmo_remove_useless_data(ds,1,'finite');
% cosmo_disp(ds_finite.samples);
% %|| [ -1.05 7 -0.209 0.844
% %|| 0.915 7 2.39 1.86
% %|| 7 7 7 7
% %|| 1.84 7 1.39 0.502 ]
% %
% % remove all samples that are useless
% ds_finite_features=cosmo_remove_useless_data(ds,2);
% cosmo_disp(ds_finite_features.samples);
% %|| [ -0.518 1.84 0.482 7 1.39 0.502 ]
% %
% % illustrate that this function also works on an array directly
% samples_finite_features=cosmo_remove_useless_data(ds.samples,2);
% cosmo_disp(samples_finite_features);
% %|| [ -0.518 1.84 0.482 7 1.39 0.502 ]
%
% Notes:
% - by default, this function removes useless features
% - data with constant and/or non-finite features is considered 'useless'
% because they are not helpful in discriminating between conditions of
% interest
%
% # For CoSMoMVPA's copyright information and license terms, #
% # see the COPYING file distributed with CoSMoMVPA. #
if nargin<3 || isempty(type), type='all'; end
if nargin<2 || isempty(dim), dim=1; end;
check_inputs(dim,type);
data=get_data(ds);
switch type
case 'finite'
msk=finite(data, dim);
case 'variable'
msk=variable(data, dim);
case 'all'
msk=finite(data, dim) & variable(data, dim);
otherwise
error('illegal type %s', type);
end
other_dim=3-dim;
ds_useful=cosmo_slice(ds, msk, other_dim);
function tf=finite(d, dim)
tf=all(isfinite(d),dim);
function tf=variable(d, dim)
switch dim
case 1
d_first=d(1,:);
case 2
d_first=d(:,1);
end
tf=~any(isnan(d),dim);
if size(d,dim)>1
tf=tf & sum(bsxfun(@ne, d_first, d), dim)>0;
end
function data=get_data(ds)
if isstruct(ds)
cosmo_isfield(ds,'samples',true);
data=ds.samples;
else
data=ds;
end
if ~isnumeric(data)
error('illegal input: expected numerical data');
end
if numel(size(data))>2
error('illegal input: expected data in matrix');
end
function check_inputs(dim,type)
if ~(isscalar(dim) && isnumeric(dim) && (dim==1 || dim==2))
error('second argument must be 1 or 2');
end
if ~ischar(type)
error('third argument must be a string');
end