function [arr, dim_labels, dim_values]=cosmo_unflatten(ds, dim, varargin)
% unflattens a dataset from 2 to (1+K) dimensions.
%
% [arr, dim_labels, dim_values]=cosmo_unflatten(ds, [dim, ][,...])
%
% Inputs:
% ds dataset structure, with fields:
% .samples PxQ for P samples and Q features.
% .a.Xdim.labels 1xK cell with string labels for each dimension,
% with X='s' for samples (dim=1) or X='f' for features
% (dim=2).
% .a.Xdim.values 1xK cell, with S_J values (J in 1:K) corresponding
% to the labels in each of the K dimensions.
% .Xa.(label) for each label in a.Xdim.labels it contains the
% sub-indices for the K dimensions. It is required
% that for every dimension J in 1:K, all values in
% ds.fa.(a.fdim.labels{J}) are in the range 1:S_K, and
% that every combination across labels is unique.
% dim dimension to be unflattened, either 1 (for samples)
% or 2 (for features; default)
% 'set_missing_to',s value to set missing values to (default: 0)
% 'matrix_labels',m Allow labels in the cell string m to be matrices
% rather than vectors. Currently the only use case is
% the 'pos' attribute for MEEG source space data.
%
% Returns:
% arr S_1 x ... x S_K x Q array if (dim==1), or
% P x S_1 x ... x S_K array if (dim==2), where
% Q=prod(S_*) if dim==1 and P=prod(S_*) if dim==2
% dim_labels the value of .a.Xdim.labels
% dim_values the value of .a.Xdim.values
%
% Example:
% % ds is an FMRI dataset with 6 samples, volumes are 3 x 2 x 5 voxels
% ds=cosmo_synthetic_dataset('size','normal','type','fmri');
% size(ds.samples)
% %|| [ 6 30 ]
% cosmo_disp(ds.a.fdim)
% %|| .labels
% %|| { 'i' 'j' 'k' }
% %|| .values
% %|| { [ 1 2 3 ] [ 1 2 ] [ 1 2 3 4 5 ] }
% %
% % flatten the dataset
% [unfl,labels,values]=cosmo_unflatten(ds);
% %
% % the unflattened dataset is of size 6 x 3 x 2 x 5
% size(unfl)
% %|| [ 6 3 2 5 ]
% cosmo_disp(labels)
% %|| { 'i' 'j' 'k' }
% cosmo_disp(values)
% %|| { [ 1 2 3 ] [ 1 2 ] [ 1 2 3 4 5 ] }
%
% % ds is a small dataset with 2 classes
% ds=cosmo_synthetic_dataset();
% %
% % compute all (2x2) split-half correlation values
% res=cosmo_correlation_measure(ds,'output','raw',...
% 'post_corr_func',[]);
% cosmo_disp(res)
% %|| .samples
% %|| [ 0.363
% %|| -0.404
% %|| -0.447
% %|| 0.606 ]
% %|| .sa
% %|| .half1
% %|| [ 1
% %|| 2
% %|| 1
% %|| 2 ]
% %|| .half2
% %|| [ 1
% %|| 1
% %|| 2
% %|| 2 ]
% %|| .a
% %|| .sdim
% %|| .labels
% %|| { 'half1' 'half2' }
% %|| .values
% %|| { [ 1 [ 1
% %|| 2 ] 2 ] }
% %
% % reshape the correlations into a square matrix
% [unfl,labels,values]=cosmo_unflatten(res,1);
% %
% % yields a 2x2x1 matrix (matlab omits the last, singleton dimension)
% cosmo_disp(unfl)
% %|| [ 0.363 -0.447
% %|| -0.404 0.606 ]
% %
% cosmo_disp(labels)
% %|| { 'half1' 'half2' }
% %
% cosmo_disp(values)
% %|| { [ 1 [ 1
% %|| 2 ] 2 ] }
%
%
% Notes:
% - A typical use case is mapping an fMRI or MEEG dataset struct
% back to a 3D or 4D array.
% - This function is the inverse of cosmo_flatten.
%
% See also: cosmo_flatten, cosmo_map2fmri, cosmo_map2meeg
%
% # For CoSMoMVPA's copyright information and license terms, #
% # see the COPYING file distributed with CoSMoMVPA. #
if nargin<2 || isempty(dim), dim=2; end
if ~(isnumeric(dim) && isscalar(dim))
error('second argument must be numeric');
end
cosmo_check_dataset(ds);
defaults=struct();
defaults.set_missing_to=0;
defaults.matrix_labels=cell(0);
opt=cosmo_structjoin(defaults,varargin);
switch dim
case 1
cosmo_isfield(ds,{'a.sdim','samples','sa'},true);
do_transpose=true;
a_dim=ds.a.sdim;
attr=ds.sa;
case 2
cosmo_isfield(ds,{'a.fdim','samples','fa'},true);
do_transpose=false;
a_dim=ds.a.fdim;
attr=ds.fa;
otherwise
error('dim must be 1 or 2');
end
samples=ds.samples;
if do_transpose
samples=samples';
a_dim.values=cellfun(@transpose,a_dim.values,...
'UniformOutput',false);
end
[arr, dim_labels,dim_values]=unflatten_features(samples, ...
a_dim, attr, opt);
if do_transpose
arr=shiftdim(arr,1);
dim_values=cellfun(@transpose,dim_values,'UniformOutput',false);
end
function [arr, dim_labels, dim_values]=unflatten_features(samples, ...
a_dim, attr, opt)
nsamples=size(samples,1);
dim_labels=a_dim.labels;
dim_values=a_dim.values;
% number of feature dimensions
ndim=numel(dim_labels);
% get sub indices for each feature dimension
sub_indices=cellfun(@(x)attr.(x), dim_labels, 'UniformOutput', false);
% get dimension values
[dim_sizes, dim_values]=get_dim_sizes(dim_values, dim_labels, opt);
max_indices=cellfun(@max,sub_indices);
too_small_dim=find(max_indices(:)>dim_sizes(:),1);
if ~isempty(too_small_dim)
error(['dimension with label %s has %d dimension labels,'...
'but attribute indexes up to %d'],...
dim_labels{too_small_dim}, dim_sizes(too_small_dim),...
max_indices(too_small_dim));
end
% allocate space for output - one cell per sample
arr_cell=cell(1,nsamples);
% convert sub indices to linear indices
if ndim==1
lin_indices=sub_indices{1};
else
lin_indices=sub2ind(dim_sizes,sub_indices{:});
end
unq_lin_indices=unique(lin_indices);
if numel(lin_indices)~=numel(unq_lin_indices)
h=histc(lin_indices,unq_lin_indices);
duplicate=unq_lin_indices(find(h>1,1));
two_duplicate_pos=find(lin_indices==duplicate,2);
error('Duplicate features at #%d and #%d', ...
two_duplicate_pos(1), two_duplicate_pos(2));
end
% allocate space in 'ndim'-space for each sample,
% but with a first singleton dimension as that one
% is used for the samples
arr_dim=zeros([1, dim_sizes]);
% process each sample
for k=1:nsamples
% make empty
arr_dim(:)=opt.set_missing_to;
% assign to proper location
arr_dim(lin_indices)=samples(k, :);
% store result for this sample
arr_cell{k}=arr_dim;
end
% combine all samples
arr=cat(1, arr_cell{:});
function [dim_sizes, dim_values]=get_dim_sizes(dim_values, dim_labels, opt)
ndim=numel(dim_labels);
if numel(dim_values)~=ndim
error(['size mismatch between number of dimension values (%d)'...
'and dimension labels (%d)'],...
numel(dim_values), ndim);
end
% number of elements in each dimension
dim_sizes=zeros(1,ndim);
% go over dimensions
for dim=1:ndim
dim_label=dim_labels{dim};
dim_value=dim_values{dim};
if cosmo_match({dim_label},opt.matrix_labels)
dim_size=size(dim_value,2);
else
if ~isvector(dim_value)
error(['Label ''%s'' (dimension %d) must be a vector, '...
'because it was not specified as a matrix '...
'dimension in the ''matrix_labels'' option'],...
dim_label, dim);
end
dim_size=numel(dim_value);
dim_values{dim}=dim_value(:)'; % make it a row vector
end
dim_sizes(dim)=dim_size;
end