cosmo dim transpose skl

function ds = cosmo_dim_transpose(ds, dim_labels, target_dim, target_pos)
    % move a dataset dimension from samples to features or vice versa
    %
    % ds_tr=cosmo_dim_transpose(ds, dim_labels[, target_dim, target_pos])
    %
    % Inputs:
    %   ds              dataset struct
    %   dim_labels      a single dimension label, or a cell with dimension
    %                   labels. If target_dim is 1 [or 2], then all labels in
    %                   dim_labels must be present in ds.a.fdim[sdim].values,
    %                   and all labels in dim_labels must be a fieldname of
    %                   ds.fa[sa].
    %   target_dim      (optional) indicates that the dimensions in dim_labels
    %                   must be moved from features to samples (if
    %                   target_dim==1) or from samples to features (if
    %                   target_dim==2). If omitted, it is deduced from
    %                   dim_labels.
    %   target_pos      (optional) the position which the first label in
    %                   dim_labels must occupied after the transpose.
    %
    % Output:
    %   ds_tr           dataset struct where all labels in dim_labels are
    %                   in ds_tr.a.sdim[fdim] (if target_dim is 1 [or 2]), and
    %                   where the fieldnames of ds_tr.sa[fa] is a superset of
    %                   dim_labels.
    %                   A field .fa.transpose_ids [.sa.transpose_ids] is added
    %                   indicating the original feature [sample] id (column
    %                   [row]) that the samples belonged too.
    %
    % Examples:
    %     ds=cosmo_synthetic_dataset('type','timefreq');
    %     % dataset attribute dimensions are
    %     % (<empty> [samples]) x (chan x freq x time [features])
    %     cosmo_disp(ds.a.fdim)
    %     %|| .labels
    %     %||   { 'chan'
    %     %||     'freq'
    %     %||     'time' }
    %     %|| .values
    %     %||   { { 'MEG0111'  'MEG0112'  'MEG0113' }
    %     %||     [ 2         4 ]
    %     %||     [ -0.2 ]                            }
    %     % transpose 'time' from features to samples
    %     ds_tr_time=cosmo_dim_transpose(ds,'time');
    %     % dataset attribute dimensions are (time) x (chan x freq)
    %     cosmo_disp({ds_tr_time.a.sdim,ds_tr_time.a.fdim})
    %     %|| { .labels         .labels
    %     %||     { 'time' }      { 'chan'
    %     %||   .values             'freq' }
    %     %||     { [ -0.2 ] }  .values
    %     %||                     { { 'MEG0111'  'MEG0112'  'MEG0113' }
    %     %||                       [ 2         4 ]                     } }
    %     % using the defaults, chan is moved from features to samples, and
    %     % added at the end of .a.sdim.labels
    %     ds_tr_time_chan=cosmo_dim_transpose(ds_tr_time,'chan');
    %     % dataset attribute dimensions are (time x chan) x (freq)
    %     cosmo_disp({ds_tr_time_chan.a.sdim,ds_tr_time_chan.a.fdim})
    %     %|| { .labels                        .labels
    %     %||     { 'time'  'chan' }             { 'freq' }
    %     %||   .values                        .values
    %     %||     { [ -0.2 ]  { 'MEG0111'        { [ 2         4 ] }
    %     %||                   'MEG0112'
    %     %||                   'MEG0113' } }                        }
    %     % when setting the position explicitly, chan is moved from features to
    %     % samples, and inserted to the first position in .a.sdim.labels
    %     ds_tr_chan_time=cosmo_dim_transpose(ds_tr_time,'chan',1,1);
    %     % dataset attribute dimensions are (chan x time) x (freq)
    %     cosmo_disp({ds_tr_chan_time.a.sdim,ds_tr_chan_time.a.fdim})
    %     %|| { .labels                        .labels
    %     %||     { 'chan'  'time' }             { 'freq' }
    %     %||   .values                        .values
    %     %||     { { 'MEG0111'    [ -0.2 ]      { [ 2         4 ] }
    %     %||         'MEG0112'
    %     %||         'MEG0113' }           }                        }
    %     %
    %     % this moves the time dimension back to the feature dimension.
    %     ds_orig=cosmo_dim_transpose(ds_tr_time,'time');
    %     cosmo_disp(ds_orig.a.fdim)
    %     %|| .labels
    %     %||   { 'chan'
    %     %||     'freq'
    %     %||     'time' }
    %     %|| .values
    %     %||   { { 'MEG0111'  'MEG0112'  'MEG0113' }
    %     %||     [ 2         4 ]
    %     %||     [ -0.2 ]                            }
    %
    %
    % Notes:
    %   - This function is aimed at MEEG datasets (and for fMRI datasets with a
    %     time dimension), so that time can be made a sample dimension
    %
    % #   For CoSMoMVPA's copyright information and license terms,   #
    % #   see the COPYING file distributed with CoSMoMVPA.           #

    if ischar(dim_labels)
        dim_labels = {dim_labels};
    elseif ~iscellstr(dim_labels)
        error('input must be string or cell of strings');
    end

    if nargin < 4
        target_pos = 0;
    end

    if nargin < 3
        target_dim = find_target_dim(ds, dim_labels);
    end

    attr_name = dim2attr_name(target_dim);

    % split dataset by dim_labels
    source_dim = 3 - target_dim;
    sp = cosmo_split(ds, dim_labels, source_dim);

    % move attribute for each split
    n = numel(sp);
    for k = 1:n
        sp{k} = copy_attr(sp{k}, dim_labels, target_dim);
    end

    % join splits
    ds = cosmo_stack(sp, target_dim, 'unique');

    % remove dimension from source_dim
    [ds, unused, values] = cosmo_dim_remove(ds, dim_labels);

    cell_transpose = @(c)cellfun(@(x)x', c, 'UniformOutput', false)';
    values_tr = cell_transpose(values);
    attr_tr = ds.(attr_name);

    % insert dimension in target_dim
    ds = cosmo_dim_insert(ds, target_dim, target_pos, ...
                          dim_labels, values_tr, attr_tr);

    % ensure all kosher
    cosmo_check_dataset(ds);

function target_dim = find_target_dim(ds, dim_labels)
    for j = 1:numel(dim_labels)
        source_dim_j = cosmo_dim_find(ds, dim_labels{j});

        if j == 1
            source_dim = source_dim_j;
        elseif source_dim_j ~= source_dim
            error('labels %s and %s do not share the same dimension', ...
                  dim_labels{1}, dim_labels{j});
        end
    end

    target_dim = 3 - source_dim;

function prefix = dim2prefix(dim)
    prefixes = 'sf';
    prefix = prefixes(dim);

function attr_name = dim2attr_name(dim)
    % returns 'sa' or 'fa'
    attr_name = [dim2prefix(dim) 'a'];

function ds = copy_attr(ds, dim_labels, target_dim)
    % copy between .fa and .sa
    source_dim = 3 - target_dim;
    src_name = dim2attr_name(source_dim);
    trg_name = dim2attr_name(target_dim);

    trg_size = [1 1];
    trg_size(target_dim) = size(ds.samples, target_dim);

    for k = 1:numel(dim_labels)
        dim_label = dim_labels{k};
        v = ds.(src_name).(dim_label);

        % must all have the same value (otherwise cosmo_split is broken)
        assert(~isempty(v));
        unq = v(1);
        assert(all(unq == v(:)));

        ds.(trg_name).(dim_label) = repmat(unq, trg_size);
    end

    ds.(src_name) = rmfield(ds.(src_name), dim_labels);

    % add transpose_ids, so that the input can be reconstructed
    % even after permutations of rows or columns
    attr_label = 'transpose_ids';
    src_size = [1 1];
    src_size(source_dim) = size(ds.samples, source_dim);
    ds.(src_name).(attr_label) = reshape(1:max(src_size), src_size);