cosmo anova feature selector skl

function selected_indices = cosmo_anova_feature_selector(dataset, how_many)
    % find the features that show the most variance between classes
    %
    % selected_indices=cosmo_anova_feature_selector(dataset, how_many)
    %
    % Inputs:
    %  dataset          struct with .samples and .sa.targets
    %  how_many         value between 0 and 1 keeps how_many*100% features;
    %                   values >=1 keeps how_many features
    %
    % Output:
    %  selected_indices   feature ids in dataset with most variance between
    %                     classes.
    %
    % Example:
    %     ds=cosmo_synthetic_dataset();
    %     disp(size(ds.samples))
    %     %|| [ 6 6 ]
    %     cosmo_anova_feature_selector(ds,.45) % find best ~45% of features
    %     %|| [ 2 4 5 ]
    %     cosmo_anova_feature_selector(ds,4) % find best 4 features
    %     %|| [ 2 4 5 3 ]
    %
    % #   For CoSMoMVPA's copyright information and license terms,   #
    % #   see the COPYING file distributed with CoSMoMVPA.           #

    fstat = cosmo_stat(dataset, 'F');
    fvalues = fstat.samples;

    % ensure that nan values are not selected by setting them to
    % an impossible low F value
    fvalues(isnan(fvalues)) = -1;

    % sort by F values, largest first
    [unused, idxs] = sort(fvalues, 'descend');

    % determine features to select
    nfeatures = size(dataset.samples, 2);

    if how_many >= 1
        if round(how_many) ~= how_many
            error('how_many>=1 is not an integer');
        elseif how_many > nfeatures
            error('dataset has %d features, cannot return %d', ...
                  nfeatures, how_many);
        end
        nkeep = how_many;
    else
        nkeep = round(how_many * nfeatures);
    end

    selected_indices = idxs(1:nkeep);

    % throw an error if any indices with NaN F values
    if any(fvalues(selected_indices) < 0)
        idx = find(fvalues(selected_indices) < 0, 1);
        error('Feature %d has NaN Fscore', selected_indices(idx));
    end