cosmo index unique skl

function [cell_indices, unique_values]=cosmo_index_unique(values)
% index unique (combinations of) elements
%
% [cell_indices, unique_values]=cosmo_index_unique(values)
%
% Input:
%   values              either:
%                       - cell with K elements, each of which must be
%                         either a vector with M elements or a cell with
%                         M strings (each element in each cell is treated
%                         as a row); or
%                       - MxK matrix
%
% Returns:
%   cell_indices        Ux1 cell, if along the input there are U unique
%                       combinations of values (element-wise). The K-th
%                       element has U_K indices in the range 1:M indicating
%                       the rows in the input have the same value
%   unique_values       either:
%                       - Kx1 cell, each with U elements, containing the
%                         unique combinations of values of the input
%                         [if the input is a cell]; or
%                       - UxK cell, containing the unique rows in the input
%
% Examples:
%     [i,u]=cosmo_index_unique({[3 2 2 2 1],[3 2 3 3 3]});
%     cosmo_disp(i);
%     %|| { [ 5 ]
%     %||   [ 2 ]
%     %||   [ 3
%     %||     4 ]
%     %||   [ 1 ] }
%     cosmo_disp(u);
%     %|| { [ 1    [ 3
%     %||     2      2
%     %||     2      3
%     %||     3 ]    3 ] }
%
%     % the same operation in matrix operation (input is transposed)
%     [i,u]=cosmo_index_unique([3 2 2 2 1;3 2 3 3 3]');
%     cosmo_disp(i);
%     %|| { [ 5 ]
%     %||   [ 2 ]
%     %||   [ 3
%     %||     4 ]
%     %||   [ 1 ] }
%     cosmo_disp(u);
%     %|| [ 1         3
%     %||   2         2
%     %||   2         3
%     %||   3         3 ]
%
%     % it also works if (some of the) input contains cell strings
%     [i,u]=cosmo_index_unique({{'ccc','bb','bb','bb','a'},...
%                                 [4 3 4 4 4]});
%     cosmo_disp(i);
%     %|| { [ 5 ]
%     %||   [ 2 ]
%     %||   [ 3
%     %||     4 ]
%     %||   [ 1 ] }
%     cosmo_disp(u);
%     %|| { { 'a'      [ 4
%     %||     'bb'       3
%     %||     'bb'       4
%     %||     'ccc' }    4 ] }
%
% #   For CoSMoMVPA's copyright information and license terms,   #
% #   see the COPYING file distributed with CoSMoMVPA.           #

    return_unique_values=nargout>=2;

    [idxs,input_is_array]=index_unique_per_value(values);

    [idxs_sorted,i]=sortrows(idxs);

    msk=[true; any(diff(idxs_sorted,1),2)];
    unq_pos=find(msk);

    nidxs=size(idxs,1);
    cell_sizes=diff([unq_pos;(nidxs+1)]);

    % convert to cell representation
    %cell_indices=mat2cell(i,cell_sizes,1);
    cell_indices=quick_mat2cell_vec(i,cell_sizes);
    %assertEqual(cell_indices,cell_indices2);

    if isempty(i)
        singleton_idx=idxs;
    else
        singleton_idx=i(unq_pos);
    end

    if return_unique_values
        unique_values=get_unique_values(values,singleton_idx,...
                                                input_is_array);
    end

function c=quick_mat2cell_vec(i, cell_sizes)
    n=numel(cell_sizes);
    c=cell(n,1);
    pos=0;
    for k=1:n
        ncell=cell_sizes(k);
        c{k}=i(pos+(1:ncell));
        pos=pos+ncell;
    end



function unique_values=get_unique_values(values,first_idx,input_is_array)
    if input_is_array
        % return matrix
        unique_values=values(first_idx,:);
    else
        % return cell with values
        ndim=numel(values);
        unique_values=cell(1,ndim);
        for k=1:ndim
            vdim=cosmo_slice(values{k}(:),first_idx);
            unique_values{k}=vdim;
        end
    end


function [idxs,input_is_array]=index_unique_per_value(values)
    % finds the indices of unique elements for each element
    % in values (that must be a cell)
    input_is_array=(islogical(values) || isnumeric(values)) && ...
                        numel(size(values))==2;
    if input_is_array
        ndim=size(values,2);
    elseif iscell(values)
        ndim=numel(values);
    else
        error('input must be matrix or cell');
    end

    if ndim==0
        % no values, return
        idxs=[];
        return;
    end

    for k=1:ndim
        if input_is_array
            vs=values(:,k);
        else
            vs=values{k};
        end

        if numel(vs)==0
            % no values, return
            idxs=[];
            return;
        end

        idx=unique_indices_from_vector(vs);

        % ensure all elements in values have the same size
        nv=numel(idx);
        if k==1
            nv_first=nv;

            % allocate space for output
            idxs=zeros(nv,ndim);
        else
            if nv~=nv_first
                error('element %d has %d values, first has %d',...
                            k,nv,nv_first);
            end
        end

        % store indices
        idxs(:,k)=idx;
    end

function idx=unique_indices_from_vector(vs)
    if ~is_1d(vs)
        error('element %d is not one-dimensional',k);
    end

    [unused,unused,idx]=unique(vs);


function tf=is_1d(x)
    tf=sum(size(x)>1)<=1;