cosmo flatten

function ds=cosmo_flatten(arr, dim_labels, dim_values, dim, varargin)
% flattens an arbitrary array to a dataset structure
%
% ds=cosmo_flatten(arr, dim_labels, dim_values, dim[, ...])
%
% Inputs:
%   arr                S_1 x ... x S_K x Q input array if (dim==1), or
%                      P x S_1 x ... x S_K input array if (dim==2)
%   dim_labels         1xK cell containing labels for each dimension but
%                      the first one.
%   dim_values         1xK cell with S_J values (J in 1:K) corresponding to
%                      the labels in each of the K dimensions.
%   dim                dimension along which to flatten, either 1 (samples)
%                      or 2 (features; default)
%   'matrix_labels',m  Allow labels in the cell string m to be matrices
%                      rather than vectors. Currently the only use case is
%                      the 'pos' attribute for MEEG source space data.
%
% Output:
%   ds                 dataset structure, with fields:
%      .samples        PxQ data for P samples and Q features.
%      .a.dim.labels   Kx1 cell with the values in dim_labels
%      .a.dim.values   Kx1 cell with the values in dim_values. The i-th
%                      element has S_i elements along dimension dim
%      .fa.(label)     for each label in a.dim.labels it contains the
%      .samples        PxQ data for P samples and Q features, where
%                      Q=prod(S_*) if dim==1 and P=prod(S_*) if dim==2
%      .a.Xdim.labels  1xK cell with the values in dim_labels (X=='s' if
%                      dim==1, and 'f' if dim==2); the M-th element must
%                      have S_M values.
%      .a.Xdim.values  1xK cell with the values in dim_values; the M-th
%                      element must have S_M values.
%      .Xa.(label)     for each label in a.Xdim.labels it contains the
%                      sub-indices for the K dimensions. It is ensured
%                      that for every dimension J in 1:K, all values in
%                      ds.fa.(a.dim.labels{J}) are in the range 1:S_K.
%
% Examples:
%     % typical usage: flatten features in 2x3x5 array, 1 sample
%     data=reshape(1:30, [1 2,3,5]);
%     ds=cosmo_flatten(data,{'i','j','k'},{1:2,1:3,{'a','b','c','d','e'}});
%     cosmo_disp(ds)
%     %|| .samples
%     %||   [ 1         2         3  ...  28        29        30 ]@1x30
%     %|| .fa
%     %||   .i
%     %||     [ 1 2 1  ...  2 1 2 ]@1x30
%     %||   .j
%     %||     [ 1 1 2  ...  2 3 3 ]@1x30
%     %||   .k
%     %||     [ 1 1 1  ...  5 5 5 ]@1x30
%     %|| .a
%     %||   .fdim
%     %||     .labels
%     %||       { 'i'  'j'  'k' }
%     %||     .values
%     %||       { [ 1 2 ]  [ 1 2 3 ]  { 'a'  'b'  'c'  'd'  'e' } }
%
%     % flatten samples in 1x1x2x3 array, 5 features
%     data=reshape(1:30, [1,1,2,3,5]);
%     ds=cosmo_flatten(data,{'i','j','k','m'},{1,'a',(1:2)',(1:3)'},1);
%     cosmo_disp(ds);
%     %|| .samples
%     %||   [ 1         7        13        19        25
%     %||     2         8        14        20        26
%     %||     3         9        15        21        27
%     %||     4        10        16        22        28
%     %||     5        11        17        23        29
%     %||     6        12        18        24        30 ]
%     %|| .sa
%     %||   .i
%     %||     [ 1
%     %||       1
%     %||       1
%     %||       1
%     %||       1
%     %||       1 ]
%     %||   .j
%     %||     [ 1
%     %||       1
%     %||       1
%     %||       1
%     %||       1
%     %||       1 ]
%     %||   .k
%     %||     [ 1
%     %||       2
%     %||       1
%     %||       2
%     %||       1
%     %||       2 ]
%     %||   .m
%     %||     [ 1
%     %||       1
%     %||       2
%     %||       2
%     %||       3
%     %||       3 ]
%     %|| .a
%     %||   .sdim
%     %||     .labels
%     %||       { 'i'  'j'  'k'  'm' }
%     %||     .values
%     %||       { [ 1 ]  'a'  [ 1    [ 1
%     %||                       2 ]    2
%     %||                              3 ] }
%
%
% Notes:
%   - Intended use is for flattening fMRI or MEEG datasets
%   - This function is the inverse of cosmo_unflatten.
%
% See also: cosmo_unflatten, cosmo_fmri_dataset, cosmo_meeg_dataset
%
% #   For CoSMoMVPA's copyright information and license terms,   #
% #   see the COPYING file distributed with CoSMoMVPA.           #

    defaults.matrix_labels=cell(0);
    opt=cosmo_structjoin(defaults,varargin{:});

    if nargin<4, dim=2; end

    switch dim
        case 1
            do_transpose=true;
            attr_name='sa';
            dim_name='sdim';
        case 2
            do_transpose=false;
            attr_name='fa';
            dim_name='fdim';
        otherwise
            error('illegal dim: must be 1 or 2');
    end

    if do_transpose
        % switch samples and features
        ndim=numel(dim_labels);
        nfeatures=size(arr,ndim+1);
        if nfeatures==1
            arr=reshape(arr,[1 size(arr)]);
        else
            arr=shiftdim(arr,ndim);
        end
        dim_values=cellfun(@transpose,dim_values,'UniformOutput',false);
    end

    [samples,dim_values,attr]=flatten_features(arr, dim_labels, ...
                                            dim_values, opt);

    if do_transpose
        samples=samples';
        attr=transpose_attr(attr);
        dim_values=cellfun(@transpose,dim_values,'UniformOutput',false);
    end

    ds=struct();
    ds.samples=samples;
    ds.(attr_name)=attr;
    ds.a.(dim_name).labels=dim_labels;
    ds.a.(dim_name).values=dim_values;

function attr=transpose_attr(attr)
    keys=fieldnames(attr);
    for k=1:numel(keys)
        key=keys{k};
        value=attr.(key);
        attr.(key)=value';
    end

function [samples,dim_values,attr]=flatten_features(arr, dim_labels, ...
                                                        dim_values, opt)
    % helper function to flatten features

    ndim=numel(dim_labels);
    if ndim ~= numel(dim_values)
        error('expected %d dimensions, found %d',ndim,numel(dim_values));
    elseif numel(size(arr))>(ndim+1)
        error('Array has %d dimensions, expected <= %d',...
                                        numel(size(arr)),ndim+1);
    end


    % allocate space for output
    attr=struct();

    % number of values in remaining dimensions
    % (supports the case that arr is of size [...,1]
    [dim_sizes,dim_values]=get_dim_sizes(arr,dim_labels,dim_values,opt);

    for dim=1:ndim
        % set values for dim-th dimension
        dim_label=dim_labels{dim};
        dim_value=dim_values{dim};

        nvalues=size(dim_value,2);

        % set the indices
        indices=1:nvalues;

        % make an array lin_values that has size 1 in every dimension
        % except for the 'dim'-th one, where it has size 'nvalues'.
        singleton_size=ones(1,ndim);
        singleton_size(dim)=nvalues;
        if ndim==1
            % reshape only works with >=2 dimensions
            lin_values=indices;
        else
            lin_values=reshape(indices,singleton_size);
        end

        % now the lin_values have to be tiled (using repmat). The number of
        % repeats is 'dim_sizes'('k') for all 'k' except for 'dim',
        % where it is 1 (as it has 'nvalues' in that dimension already).
        rep_size=dim_sizes;
        rep_size(dim)=1;

        rep_values=repmat(lin_values, rep_size(:)');

        % store indices as a row vector.
        attr.(dim_label)=reshape(rep_values, 1, []);
    end

    % get array and sample sizes
    nsamples=size(arr,1);
    nfeatures=prod(dim_sizes);

    samples=reshape(arr, nsamples, nfeatures);


function [dim_sizes, dim_values]=get_dim_sizes(arr,dim_labels,dim_values,opt)
    ndim=numel(dim_values);
    dim_sizes=zeros(1,ndim);

    for dim=1:ndim
        dim_label=dim_labels{dim};
        dim_value=dim_values{dim};

        if cosmo_match({dim_label},opt.matrix_labels)
            dim_size=size(dim_value,2);
        else
            if ~isvector(dim_value)
                error(['Label ''%s'' (dimension %d) must be a vector, '...
                        'because it was not specified as a matrix '...
                        'dimension in the ''matrix_fields'' option'],...
                        dim_label, dim);
            end
            dim_size=numel(dim_value);
            dim_values{dim}=dim_value(:)'; % make it a row vector
        end


        if dim_size ~= size(arr,dim+1)
            error(['Label ''%s'' (dimension %d) has %d values, ',...
                        'expected %d based on the array input'],...
                    dim_label, dim, dim_size, size(arr,dim+1));
        end

        dim_sizes(dim)=dim_size;
    end