function selected_indices=cosmo_anova_feature_selector(dataset, how_many)
% find the features that show the most variance between classes
%
% selected_indices=cosmo_anova_feature_selector(dataset, how_many)
%
% Inputs:
% dataset struct with .samples and .sa.targets
% how_many value between 0 and 1 keeps how_many*100% features;
% values >=1 keeps how_many features
%
% Output:
% selected_indices feature ids in dataset with most variance between
% classes.
%
% Example:
% ds=cosmo_synthetic_dataset();
% disp(size(ds.samples))
% %|| [ 6 6 ]
% cosmo_anova_feature_selector(ds,.45) % find best ~45% of features
% %|| [ 2 4 5 ]
% cosmo_anova_feature_selector(ds,4) % find best 4 features
% %|| [ 2 4 5 3 ]
%
% # For CoSMoMVPA's copyright information and license terms, #
% # see the COPYING file distributed with CoSMoMVPA. #
fstat=cosmo_stat(dataset,'F');
fvalues=fstat.samples;
% ensure that nan values are not selected by setting them to
% an impossible low F value
fvalues(isnan(fvalues))=-1;
% sort by F values, largest first
[unused, idxs]=sort(fvalues,'descend');
% determine features to select
nfeatures=size(dataset.samples,2);
if how_many>=1
if round(how_many)~=how_many
error('how_many>=1 is not an integer');
elseif how_many>nfeatures
error('dataset has %d features, cannot return %d',...
nfeatures,how_many);
end
nkeep=how_many;
else
nkeep=round(how_many*nfeatures);
end
selected_indices=idxs(1:nkeep);
% throw an error if any indices with NaN F values
if any(fvalues(selected_indices)<0)
idx=find(fvalues(selected_indices)<0,1);
error('Feature %d has NaN Fscore', selected_indices(idx));
end