function ds=cosmo_flatten(arr, dim_labels, dim_values, dim, varargin)
% flattens an arbitrary array to a dataset structure
%
% ds=cosmo_flatten(arr, dim_labels, dim_values, dim[, ...])
%
% Inputs:
% arr S_1 x ... x S_K x Q input array if (dim==1), or
% P x S_1 x ... x S_K input array if (dim==2)
% dim_labels 1xK cell containing labels for each dimension but
% the first one.
% dim_values 1xK cell with S_J values (J in 1:K) corresponding to
% the labels in each of the K dimensions.
% dim dimension along which to flatten, either 1 (samples)
% or 2 (features; default)
% 'matrix_labels',m Allow labels in the cell string m to be matrices
% rather than vectors. Currently the only use case is
% the 'pos' attribute for MEEG source space data.
%
% Output:
% ds dataset structure, with fields:
% .samples PxQ data for P samples and Q features.
% .a.dim.labels Kx1 cell with the values in dim_labels
% .a.dim.values Kx1 cell with the values in dim_values. The i-th
% element has S_i elements along dimension dim
% .fa.(label) for each label in a.dim.labels it contains the
% .samples PxQ data for P samples and Q features, where
% Q=prod(S_*) if dim==1 and P=prod(S_*) if dim==2
% .a.Xdim.labels 1xK cell with the values in dim_labels (X=='s' if
% dim==1, and 'f' if dim==2); the M-th element must
% have S_M values.
% .a.Xdim.values 1xK cell with the values in dim_values; the M-th
% element must have S_M values.
% .Xa.(label) for each label in a.Xdim.labels it contains the
% sub-indices for the K dimensions. It is ensured
% that for every dimension J in 1:K, all values in
% ds.fa.(a.dim.labels{J}) are in the range 1:S_K.
%
% Examples:
% % typical usage: flatten features in 2x3x5 array, 1 sample
% data=reshape(1:30, [1 2,3,5]);
% ds=cosmo_flatten(data,{'i','j','k'},{1:2,1:3,{'a','b','c','d','e'}});
% cosmo_disp(ds)
% %|| .samples
% %|| [ 1 2 3 ... 28 29 30 ]@1x30
% %|| .fa
% %|| .i
% %|| [ 1 2 1 ... 2 1 2 ]@1x30
% %|| .j
% %|| [ 1 1 2 ... 2 3 3 ]@1x30
% %|| .k
% %|| [ 1 1 1 ... 5 5 5 ]@1x30
% %|| .a
% %|| .fdim
% %|| .labels
% %|| { 'i' 'j' 'k' }
% %|| .values
% %|| { [ 1 2 ] [ 1 2 3 ] { 'a' 'b' 'c' 'd' 'e' } }
%
% % flatten samples in 1x1x2x3 array, 5 features
% data=reshape(1:30, [1,1,2,3,5]);
% ds=cosmo_flatten(data,{'i','j','k','m'},{1,'a',(1:2)',(1:3)'},1);
% cosmo_disp(ds);
% %|| .samples
% %|| [ 1 7 13 19 25
% %|| 2 8 14 20 26
% %|| 3 9 15 21 27
% %|| 4 10 16 22 28
% %|| 5 11 17 23 29
% %|| 6 12 18 24 30 ]
% %|| .sa
% %|| .i
% %|| [ 1
% %|| 1
% %|| 1
% %|| 1
% %|| 1
% %|| 1 ]
% %|| .j
% %|| [ 1
% %|| 1
% %|| 1
% %|| 1
% %|| 1
% %|| 1 ]
% %|| .k
% %|| [ 1
% %|| 2
% %|| 1
% %|| 2
% %|| 1
% %|| 2 ]
% %|| .m
% %|| [ 1
% %|| 1
% %|| 2
% %|| 2
% %|| 3
% %|| 3 ]
% %|| .a
% %|| .sdim
% %|| .labels
% %|| { 'i' 'j' 'k' 'm' }
% %|| .values
% %|| { [ 1 ] 'a' [ 1 [ 1
% %|| 2 ] 2
% %|| 3 ] }
%
%
% Notes:
% - Intended use is for flattening fMRI or MEEG datasets
% - This function is the inverse of cosmo_unflatten.
%
% See also: cosmo_unflatten, cosmo_fmri_dataset, cosmo_meeg_dataset
%
% # For CoSMoMVPA's copyright information and license terms, #
% # see the COPYING file distributed with CoSMoMVPA. #
defaults.matrix_labels=cell(0);
opt=cosmo_structjoin(defaults,varargin{:});
if nargin<4, dim=2; end
switch dim
case 1
do_transpose=true;
attr_name='sa';
dim_name='sdim';
case 2
do_transpose=false;
attr_name='fa';
dim_name='fdim';
otherwise
error('illegal dim: must be 1 or 2');
end
if do_transpose
% switch samples and features
ndim=numel(dim_labels);
nfeatures=size(arr,ndim+1);
if nfeatures==1
arr=reshape(arr,[1 size(arr)]);
else
arr=shiftdim(arr,ndim);
end
dim_values=cellfun(@transpose,dim_values,'UniformOutput',false);
end
[samples,dim_values,attr]=flatten_features(arr, dim_labels, ...
dim_values, opt);
if do_transpose
samples=samples';
attr=transpose_attr(attr);
dim_values=cellfun(@transpose,dim_values,'UniformOutput',false);
end
ds=struct();
ds.samples=samples;
ds.(attr_name)=attr;
ds.a.(dim_name).labels=dim_labels;
ds.a.(dim_name).values=dim_values;
function attr=transpose_attr(attr)
keys=fieldnames(attr);
for k=1:numel(keys)
key=keys{k};
value=attr.(key);
attr.(key)=value';
end
function [samples,dim_values,attr]=flatten_features(arr, dim_labels, ...
dim_values, opt)
% helper function to flatten features
ndim=numel(dim_labels);
if ndim ~= numel(dim_values)
error('expected %d dimensions, found %d',ndim,numel(dim_values));
elseif numel(size(arr))>(ndim+1)
error('Array has %d dimensions, expected <= %d',...
numel(size(arr)),ndim+1);
end
% allocate space for output
attr=struct();
% number of values in remaining dimensions
% (supports the case that arr is of size [...,1]
[dim_sizes,dim_values]=get_dim_sizes(arr,dim_labels,dim_values,opt);
for dim=1:ndim
% set values for dim-th dimension
dim_label=dim_labels{dim};
dim_value=dim_values{dim};
nvalues=size(dim_value,2);
% set the indices
indices=1:nvalues;
% make an array lin_values that has size 1 in every dimension
% except for the 'dim'-th one, where it has size 'nvalues'.
singleton_size=ones(1,ndim);
singleton_size(dim)=nvalues;
if ndim==1
% reshape only works with >=2 dimensions
lin_values=indices;
else
lin_values=reshape(indices,singleton_size);
end
% now the lin_values have to be tiled (using repmat). The number of
% repeats is 'dim_sizes'('k') for all 'k' except for 'dim',
% where it is 1 (as it has 'nvalues' in that dimension already).
rep_size=dim_sizes;
rep_size(dim)=1;
rep_values=repmat(lin_values, rep_size(:)');
% store indices as a row vector.
attr.(dim_label)=reshape(rep_values, 1, []);
end
% get array and sample sizes
nsamples=size(arr,1);
nfeatures=prod(dim_sizes);
samples=reshape(arr, nsamples, nfeatures);
function [dim_sizes, dim_values]=get_dim_sizes(arr,dim_labels,dim_values,opt)
ndim=numel(dim_values);
dim_sizes=zeros(1,ndim);
for dim=1:ndim
dim_label=dim_labels{dim};
dim_value=dim_values{dim};
if cosmo_match({dim_label},opt.matrix_labels)
dim_size=size(dim_value,2);
else
if ~isvector(dim_value)
error(['Label ''%s'' (dimension %d) must be a vector, '...
'because it was not specified as a matrix '...
'dimension in the ''matrix_fields'' option'],...
dim_label, dim);
end
dim_size=numel(dim_value);
dim_values{dim}=dim_value(:)'; % make it a row vector
end
if dim_size ~= size(arr,dim+1)
error(['Label ''%s'' (dimension %d) has %d values, ',...
'expected %d based on the array input'],...
dim_label, dim, dim_size, size(arr,dim+1));
end
dim_sizes(dim)=dim_size;
end