cosmo slice

function ds=cosmo_slice(ds, to_select, dim, type_or_check)
% Slice a dataset by samples (the default) or features
%
% sliced_ds=cosmo_slice(ds, elements_to_select[, dim][check|'struct'])
%
% Inputs:
%   ds                    One of:
%                         - dataset struct to be sliced, with PxQ field
%                           .samples and optionally fields .fa, .sa and .a.
%                         - PxQ cell
%                         - PxQ logical or numeric array
%   elements_to_select    either a binary mask or a list of indices of
%                         the samples (if dim==1) or features (if dim==2)
%                         to select. If a binary mask then the number of
%                         elements should match the size of ds in the
%                         dim-th dimension.
%   dim                   Slicing dimension: along samples (dim==1) or
%                         features (dim==2). (default: 1).
%   check                 Boolean that indicates that if ds is a dataset,
%                         whether it should be checked for proper
%                         structure. (default: true).
%   'struct'              If provided and ds is a struct, then
%                         all fields of ds, which are assumed to be cell
%                         or arrays,  are sliced.
%
% Output:
%   sliced_ds             - If ds is a cell or array then sliced_ds is
%                           the result of slicing ds along the dim-th
%                           dimension. The result is of size NxQ (if
%                           dim==1) or PxN (if dim==2), where N is the
%                           number of non-zero values in
%                           elements_to_select.
%                         - If ds is a dataset struct then
%                           sliced_ds.samples is the result of slicing
%                           ds.samples.
%                           If present, fields .sa (if dim==1) or
%                           .fa (dim==2) are sliced as well.
%                         - when ds is a struct and the 'struct' option was
%                           given, then all fields in ds are sliced.
%
% Examples:
%     % make a simple dataset
%     ds=struct();
%     ds.samples=reshape(1:12,4,3); % 4 samples, 3 features
%     % sample attributes
%     ds.sa.chunks=[1 1 2 2]';
%     ds.sa.targets=[1 2 1 2]';
%     % feature attributes
%     ds.fa.i=[3 8 13];
%     ds.fa.roi={'vt','loc','v1'};
%     % dataset attributes
%     ds.a.note='an example';
%     % display dataset
%     cosmo_disp(ds);
%     %|| .samples
%     %||   [ 1         5         9
%     %||     2         6        10
%     %||     3         7        11
%     %||     4         8        12 ]
%     %|| .sa
%     %||   .chunks
%     %||     [ 1
%     %||       1
%     %||       2
%     %||       2 ]
%     %||   .targets
%     %||     [ 1
%     %||       2
%     %||       1
%     %||       2 ]
%     %|| .fa
%     %||   .i
%     %||     [ 3         8        13 ]
%     %||   .roi
%     %||     { 'vt'  'loc'  'v1' }
%     %|| .a
%     %||   .note
%     %||     'an example'
%     %
%     % (snippet) select samples (row) in a dataset
%     % ds is a dataset struct
%     sample_ids=[3 2];
%     % select third and second sample (in that order)
%     sliced_ds=cosmo_slice(ds,sample_ids,1);
%     %
%     cosmo_disp(sliced_ds);
%     %|| .samples
%     %||   [ 3         7        11
%     %||     2         6        10 ]
%     %|| .sa
%     %||   .chunks
%     %||     [ 2
%     %||       1 ]
%     %||   .targets
%     %||     [ 1
%     %||       2 ]
%     %|| .fa
%     %||   .i
%     %||     [ 3         8        13 ]
%     %||   .roi
%     %||     { 'vt'  'loc'  'v1' }
%     %|| .a
%     %||   .note
%     %||     'an example'
%     %
%     % select third and second feature (in that order)
%     sliced_ds=cosmo_slice(ds, [3 2], 2);
%     cosmo_disp(sliced_ds);
%     %|| .samples
%     %||   [  9         5
%     %||     10         6
%     %||     11         7
%     %||     12         8 ]
%     %|| .sa
%     %||   .chunks
%     %||     [ 1
%     %||       1
%     %||       2
%     %||       2 ]
%     %||   .targets
%     %||     [ 1
%     %||       2
%     %||       1
%     %||       2 ]
%     %|| .fa
%     %||   .i
%     %||     [ 13         8 ]
%     %||   .roi
%     %||     { 'v1'  'loc' }
%     %|| .a
%     %||   .note
%     %||     'an example'
%     %
%     % using a logical mask, select features with odd value for .i
%     msk=mod(ds.fa.i,2)==1;
%     disp(msk)
%     %|| [1 0 1]
%     sliced_ds=cosmo_slice(ds, msk, 2);
%     cosmo_disp(sliced_ds);
%     %|| .samples
%     %||   [ 1         9
%     %||     2        10
%     %||     3        11
%     %||     4        12 ]
%     %|| .sa
%     %||   .chunks
%     %||     [ 1
%     %||       1
%     %||       2
%     %||       2 ]
%     %||   .targets
%     %||     [ 1
%     %||       2
%     %||       1
%     %||       2 ]
%     %|| .fa
%     %||   .i
%     %||     [ 3        13 ]
%     %||   .roi
%     %||     { 'vt'  'v1' }
%     %|| .a
%     %||   .note
%     %||     'an example'
%
%     % slice all fields in a struct
%     s=struct();
%     s.a_field=[1 2 3; 4 5 6];
%     s.another_field={'this','is','fun'};
%     cosmo_disp(s);
%     %|| .a_field
%     %||   [ 1         2         3
%     %||     4         5         6 ]
%     %|| .another_field
%     %||   { 'this'  'is'  'fun' }
%     %
%     % select first, third, third, and second column (dim=2)
%     t=cosmo_slice(s, [1 3 3 2], 2, 'struct');
%     cosmo_disp(t);
%     %|| .a_field
%     %||   [ 1         3         3         2
%     %||     4         6         6         5 ]
%     %|| .another_field
%     %||   { 'this'  'fun'  'fun'  'is' }
%
%
% Notes:
%   - do_check=false may be preferred for slice-intensive operations such
%     as when used in searchlights
%   - this function does not support arrays with more than two dimensions.
%
% #   For CoSMoMVPA's copyright information and license terms,   #
% #   see the COPYING file distributed with CoSMoMVPA.           #

    % deal with 2, 3, or 4 input arguments
    if nargin<3 || isempty(dim), dim=1; end
    if nargin<4 || isempty(type_or_check), type_or_check=true; end

    if iscell(ds) || isnumeric(ds) || islogical(ds)
        ds=slice_array(ds, to_select, dim, type_or_check);
    elseif isstruct(ds)
        if strcmp(type_or_check,'struct')
            ds=slice_struct(ds, to_select, dim, type_or_check);
        else
            if ~isfield(ds,'samples')
                error(['Expected dataset struct. To slice ordinary '...
                        'structs use "struct" as last argument']);
            end

            if type_or_check
                % check kosherness
                cosmo_check_dataset(ds);
            end

            dim_size=size(ds.samples,dim);

            % slice the samples
            ds.samples=slice_array(ds.samples,to_select,dim,type_or_check);

            % now deal with either feature or sample attributes
            attr_fns={'sa','fa'};
            attr_fn=attr_fns{dim}; % fieldname of attribute to slice

            if isfield(ds, attr_fn)
                ds.(attr_fn)=slice_struct(ds.(attr_fn),to_select,...
                                               dim,type_or_check,dim_size);
            end
        end
    else
        error('Illegal input: expected cell, array or struct');
    end


    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % helper functions
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

    function y=slice_struct(x, to_select, dim, do_check, expected_size)
        if nargin<5, expected_size=NaN; end

        y=struct();
        fns=fieldnames(x);
        for k=1:numel(fns)
            fn=fns{k};
            v=x.(fn);

            v_size=size(v,dim);

            % ensure all input sizes are the same
            if isnan(expected_size)
                expected_size=v_size;
            elseif v_size~=expected_size
                error(['Size mismatch for %s: expected %d but found %d',...
                        ' elements in dimension %d'],...
                                fn, v_size, expected_size, dim);
            end

            y.(fn)=slice_array(v, to_select, dim, do_check);
        end


    function y=slice_array(x, to_select, dim, do_check)
        if do_check
            check_size(x, to_select, dim);
            if ~isscalar(dim) || ~isnumeric(dim)
                error('dim must be 1 or 2');
            end
        end

        if dim==1
            y=x(to_select,:);
        elseif dim==2
            y=x(:,to_select);
        else
            error('dim must be 1 or 2');
        end


    function check_size(x, to_select, dim)
        if islogical(to_select) && ...
                    size(x, dim)~=numel(to_select)
            % be a bit more strict than matlab - binary array must have
            % exactly the correct size
            error('Logical mask should have %d elements, found %d', ...
                    size(x, dim), numel(to_select));
        end

        if numel(size(x))~=2
            error('Only 2D arrays are allowed');
        end

        if sum(size(to_select)>1)>1
            error('elements to select should be in vector');
        end