cosmo meeg dataset skl

function ds=cosmo_meeg_dataset(filename, varargin)
% Returns a dataset structure based on MEEG data
%
% ds=cosmo_meeg_dataset(filename, varargin)
%
% Inputs:
%   filename          filename of MEEG data to be loaded. Currently
%                     supported are files with extensions:
%                       .mat :        FieldTrip time-locked or
%                                     time-frequency  data at  either the
%                                     sensor or source level.
%                       .txt :        exported EEGLab with time-locked
%                                     data.
%                       .daterp       time-locked               }
%                       .icaerp       ICA time-locked           } EEGLab
%                       .dattimef     time-freq                 }
%                       .icatimef     ICA time-freq             }
%                       .datitc       inter-trial coherence     }
%                       .icaitc       ICA inter-trial coherence }
%                       .datersp      ERSP data                 }
%                       .icaersp      ICA ERSP data             }
%                     Alternatively it can be a FieldTrip or EEGLab struct
%                     with time-locked or time-frequency data
%   'targets', t      Px1 targets for P samples; these will be stored in
%                     the output as ds.sa.targets (optional)
%   'chunks', c       Px1 chunks for P samples; these will be stored in the
%                     the output as ds.sa.chunks (optional)
%   'data_field', f   - For FieldTrip MEEG source dataset with multiple
%                       data fields (such as 'pow' and 'mom'), this sets
%                       which data is returned. (only for source data)
%                     - For EEGLAB 'ersp' data
%                        f='ersp'       the original data is returned
%                                       (without baseline correction),
%                                       with data for each channel,
%                                       frequency and time point.
%                                       Based on datasets generated by
%                                       EEGLAB that were inspected when
%                                       writing this function, it seems
%                                       that this data represents raw power
%                                       values.
%                        f='erspbase'   the baseline is returned, with data
%                                       for each channel and frequency
%                                       (but not time point)
%                        Note: this function does currently not support
%                              returning baseline-corrected data.
%   'trials', idx     Mx1 array with indices of trials to load (optional).
%                     If not provided then all trials are loaded. The
%                     output has a .samples field with the number of rows
%                     equal to numel(idx).
%
% Returns:
%   ds                dataset struct with the following fields
%     .samples        PxQ for P samples and Q features.
%     .sa.targets     Px1 sample targets (if provided)
%     .sa.chunks      Px1 sample chunks (if provided)
%     .a
%       .meeg
%         .sample_field   name of sample field. One of 'fourierspctrm',
%                         'powspctrm', or 'trial'.
%         .samples_type   'timelock' or 'timefreq'.
%         .samples_label  Usually 'rpt'; or the first field of .dimord
%                         for FieldTrip data
%       .dim
%         .labels     1xS cell struct with labels for the feature
%                     dimensions of the input. Usually this is
%                     {'chan','time'} or {'chan','freq','time'}.
%         .values     1xS cell struct with values associated with .labels.
%                     If the K-th value has N_K values, this means that
%                     the feature dimension .labels{K} takes the
%                     values in .values{K}. For example, if
%                     .labels{1}=='chan', then .values{1} contains the
%                     channel labels.
%     .fa
%       .{D}          if D==a.fdim.labels{K} is the label for the K-th
%                     feature dimension, then .{D} contains the
%                     indices referencing a.fdim.values. Thus, all values in
%                     .{D} are in the range 1:N_K if a.fdim.values{K} has
%                     N_K values, and the J-th feature has dimension value
%                     .dim.values{K}(.{D}(J)) in the K-th dimension.
%
% Notes:
%  - The resulting dataset can be mapped back to MEEG format using
%    cosmo_map2meeg
%  - if the input contains data from a single sample (such as an average)
%    the .sample_field is set to .trial, and mapping back to MEEG format
%    adds a singleton dimension to the .trial data output field.
%  - For single-subject MVPA of single trials using data preprocessed with
%    FieldTrip, consider setting, depending on the data type:
%       * timelock (ft_timelockanalysis): cfg.keeptrials = 'yes'
%       * timefreq (ft_timefreqanalysis): cfg.keeptrials = 'yes'
%       * source   (ft_sourceanalysis)  : cfg.keeptrials = 'yes' *and*
%                                                   cfg.rawtrials = 'yes'
%  - Most MVPA applications require that .sa.targets (experimental
%    condition of each sample) and .sa.chunks (partitioning of the samples
%    in independent sets) are set, either by using this function or
%    manually afterwards.
%  - If the input is a FieldTrip struct with a field .trialinfo, then this
%    field is present in .sa.trialinfo. Depending on the contents of
%    .trialinfo, this could be used to specify conditions in each trial.
%    For example, if the third column of .trialinfo contains an integer
%    specifying the condition of each trial, after running this function
%    one can do
%
%       ds.sa.targets=ds.sa.trialinfo(:,3)
%
%    to set the trial conditions.
%  - Implementation note: when loading EEGLAB data from a file, using the
%    'trials' option means that data from different channels are loaded
%    through different 'load' commands. When loading a subset of all
%    trials, the advantage of this implementation is that significant
%    less memory is needed compared to an alternative implementation in
%    which the full dataset is loaded and then the trials of interest
%    are selected through slicing. The disadvantage is that loading may
%    take longer, because the file is opened and closed multiple times. yet
%    this approach allows one to load subsets of trials from data files
%    that are larger than the available RAM.
%    Such memory reductions are currently not available for FieldTrip
%    data, as FieldTrip's data structures do not store data for different
%    channels in different variables.
%
% See also: cosmo_map2meeg
%
% #   For CoSMoMVPA's copyright information and license terms,   #
% #   see the COPYING file distributed with CoSMoMVPA.           #

    % Input parsing stuff

    defaults=struct();
    defaults.targets=[];
    defaults.chunks=[];

    params = cosmo_structjoin(defaults, varargin);

    if cosmo_check_dataset(filename,'meeg',false)
        % it is already a dataset
        ds=filename;
    else
        % get image format and verify it is supported
        img_format=get_img_format(filename);
        supported_image_formats=get_supported_image_formats();

        % check externals
        cosmo_check_external(supported_image_formats.(img_format).externals);

        % get the reader
        reader=supported_image_formats.(img_format).reader;

        % read the dataset
        ds=reader(filename, params);
    end

    % set targets and chunks
    ds=set_vec_sa(ds,'targets',params.targets);
    ds=set_vec_sa(ds,'chunks',params.chunks);


    % check consistency
    cosmo_check_dataset(ds,'meeg');

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% general helper functions
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function ds=set_vec_sa(ds, label, values)
    if isempty(values)
        return;
    end
    if numel(values)==1
        nsamples=size(ds.samples,1);
        values=repmat(values,nsamples,1);
    end
    ds.sa.(label)=values(:);


function img_format=get_img_format(filename)
    % helper functgion to detect image format based on filename.
    % uses 'get_supported_image_formats'.
    img_formats=get_supported_image_formats();

    fns=fieldnames(img_formats);
    for k=1:numel(fns)
        fn=fns{k};

        img_spec=img_formats.(fn);
        if img_spec.file_matcher(filename)
            img_format=fn;
            return
        end
    end
    error('Unknown image format');

function img_formats=get_supported_image_formats()
    % helper function to return the image format based on the filename
    img_formats=struct();

    % helper function to see if a filename ends with a certain string.
    % uses currying - who doesn't like curry?
    ends_with=@(ext) @(fn) ischar(fn) && ...
                        isempty(cosmo_strsplit(fn,ext,-1));
    ends_with_any=@(exts) @(fn) ischar(fn) && any(...
                            cellfun(@(x)isempty(...
                                        cosmo_strsplit(fn,x,-1)),exts));

    % eeglab txt files
    img_formats.eeglab_txt.file_matcher=ends_with('.txt');
    img_formats.eeglab_txt.reader=@read_eeglab_txt;
    img_formats.eeglab_txt.externals={};

    img_formats.eeglab.file_matcher=ends_with_any({'.daterp',...
                                                   '.icaerp',...
                                                   '.dattimef',...
                                                   '.icatimef',...
                                                   '.datitc',...
                                                   '.icaitc',...
                                                   '.datersp',...
                                                   '.icaersp'});
    img_formats.eeglab.reader=@read_eeglab;
    img_formats.eeglab.externals={};

    img_formats.eeglab_struct.file_matcher=@is_eeglab_struct;
    img_formats.eeglab_struct.reader=@convert_eeglab_struct;
    img_formats.eeglab_struct.externals={};



    % fieldtrip
    % XXX any .mat file is currently assumed to be a fieldtrip struct
    img_formats.ft.file_matcher=ends_with('.mat');
    img_formats.ft.reader=@read_ft;
    img_formats.ft.externals={};

    % fieldtrip matlab struct
    img_formats.ft_struct.file_matcher=@(x) is_ft_struct(x);
    img_formats.ft_struct.reader=@convert_ft;
    img_formats.ft_struct.externals={};


function ds=posthoc_slice_dataset_if_necessary(ds,opt)
    if ~isfield(opt,'trials')
        return;
    end

    trial_idx=opt.trials;
    verify_trial_params(size(ds.samples,1),trial_idx);
    ds=cosmo_slice(ds,trial_idx);

function verify_trial_params(nsamples,trial_idx)
    if ~isnumeric(trial_idx)
        error('.trials option must be numeric');
    end

    bad_msk=trial_idx<1 | ...
                trial_idx>nsamples | ...
                round(trial_idx)~=trial_idx;

    if any(bad_msk)
        bad_pos=find(bad_msk,1);
        error(['.trials option has illegal value at position %d; '...
                'all values must be in (1:%d)'],...
                bad_pos,nsamples);
    end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% fieldtrip helper function
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function ds=read_ft(filename, opt)
    % reads it from a .mat data file
    ft=importdata(filename);
    ds=convert_ft(ft, opt);


function ds=convert_ft(ft, opt)
    [data, samples_field, fdim]=get_ft_data(ft, opt);

    ds=cosmo_flatten(data, fdim.labels, fdim.values,2,...
                                'matrix_labels',get_ft_matrix_labels());
    ds.a.meeg.samples_field=samples_field;

    if is_ft_source_struct(ft)
        if isfield(ft,'inside')
            ds=apply_ft_source_inside(ds,fdim.labels,fdim.values,...
                                        ft.inside);
        end

        optional_fields={'dim','tri'};
        ds.a.meeg=copy_optional_fields(ds.a.meeg,ft,optional_fields);
    end

    ds.a.meeg=set_samples_label_explicitly_if_necessary(ds.a.meeg,ft);

    nsamples=size(ds.samples,1);
    ft_fields={'rpt','trialinfo','cumtapcnt'};
    ds.sa=copy_fields_for_matching_sample_size(ft,nsamples,ft_fields);

    ds=posthoc_slice_dataset_if_necessary(ds,opt);


function s=copy_optional_fields(s,source,optional_fields)
    for k=1:numel(optional_fields)
        key=optional_fields{k};
        if isfield(source,key)
            s.(key)=source.(key);
        end
    end

function a_meeg=set_samples_label_explicitly_if_necessary(a_meeg,ft)
    % deal with grand average data
    if ~cosmo_isfield(ft,'dimord')
        return;
    end

    first_label=cosmo_strsplit(ft.dimord,'_',1);

    if cosmo_match({first_label},{'subj'})
        a_meeg.samples_label=first_label;
    end


function [data,samples_field,fdim]=get_ft_data(ft,opt)
    [data, samples_field]=get_data_ft(ft,opt);
    [dim_labels,ft_dim_labels]=get_ft_dim_labels(ft, samples_field);

    nlabels=numel(dim_labels);
    dim_values=cell(nlabels,1);
    matrix_labels=get_ft_matrix_labels();
    for k=1:nlabels
        label=ft_dim_labels{k};
        value=ft.(label);
        if cosmo_match({label},matrix_labels)
            dim_values{k}=value';
        else
            dim_values{k}=value(:)';
        end
    end

    fdim.labels=dim_labels;
    fdim.values=dim_values;

    if is_ft_source_struct(ft)
        fdim=add_ft_mom_field_if_present(fdim, samples_field, size(data));
        [data,fdim]=fix_ft_lcmv_if_necessary(data,fdim);
    end


function [data,fdim]=fix_ft_lcmv_if_necessary(data,fdim)
    % FT LCMV does something weird for average data; the .avg.pow field
    % is 1xNCHAN for a NCHAN x NTIME field.
    % To address this:
    % - reshape the data
    % - average the time field
    dim_labels=fdim.labels;
    data_size=size(data);

    pos_pos=find(cosmo_match(dim_labels,'pos'));
    if ~isempty(pos_pos) && ... % has pos field
                pos_pos<numel(dim_labels)

        npos=size(fdim.values{pos_pos},2);
        next_dim_pos=pos_pos+1;

        if data_size(pos_pos+1)==1 && data_size(next_dim_pos+1)==npos
            new_data_size=data_size;

            % fix with next dimension
            new_data_size(pos_pos+1)=npos;
            new_data_size(next_dim_pos+1)=1;

            data=reshape(data,new_data_size);

            % the next dimension (typically time) is averaged if
            % necessary
            next_dim_value=fdim.values{next_dim_pos};
            if numel(next_dim_value)~=1
                fdim.values{next_dim_pos}=mean(next_dim_value);
            end

        end
    end



function fdim=add_ft_mom_field_if_present(fdim, samples_field, size_data)
    % insert .mom field for source struct
    samples_field_split=cosmo_strsplit(samples_field,'.');
    has_mom=numel(samples_field_split)==2 && ...
                    strcmp(samples_field_split{2},'mom');
    if has_mom
        ndim=size_data(3);
        fdim.labels=[fdim.labels(1);...
                                {'mom'};...
                                fdim.labels(2:end)];
        switch ndim
            case 1
                values={'xyz'};
            case 3
                values={'x','y','z'};
            otherwise
                error('Unsupported number of dimensions for %s: %d',...
                            samples_field,ndim);
        end
        fdim.values=[fdim.values(1);...
                                {values};...
                                fdim.values(2:end)];
    end


function labels=get_ft_matrix_labels()
    labels={'pos'};

function [cosmo_dim_labels,ft_dim_labels]=get_ft_dim_labels(ft, ...
                                                        samples_field)
    cosmo_ft_dim_labels={ 'pos','pos';...
                          'chan','label';...
                          'freq','freq';...
                          'time','time'};

    is_source=is_ft_source_struct(ft);

    if is_source==isfield(ft,'dimord')
        error('Weird fieldtrip data: .pos and .dimord are not compatible');
    end

    if is_source
        % source data
        keys=fieldnames(ft);
        labels_msk=cosmo_match(cosmo_ft_dim_labels(:,2),keys);
    else


        dimord_labels=cosmo_strsplit(ft.dimord,'_');

        sample_field=get_sample_dimord_field(ft);
        sample_msk=cosmo_match(dimord_labels, sample_field);

        dimord_labels_without_sample=dimord_labels(~sample_msk);

        labels_msk=cosmo_match(cosmo_ft_dim_labels(:,1),...
                                dimord_labels_without_sample);
    end

    ft_dim_labels=cosmo_ft_dim_labels(labels_msk,2);
    cosmo_dim_labels=cosmo_ft_dim_labels(labels_msk,1);





function sample_field=get_sample_dimord_field(ft)
    sample_field='';

    sample_dimord_fields={'rpt','trial','subj'};
    if isfield(ft,'dimord')
        ft_dimord_fields=cosmo_strsplit(ft.dimord,'_');
        msk=cosmo_match(ft_dimord_fields,sample_dimord_fields);
        if any(msk)
            assert(sum(msk)==1);
            sample_field=sample_dimord_fields{msk};
        end
    end


function sa=copy_fields_for_matching_sample_size(ft,nsamples,keys)
    n=numel(keys);

    sa=struct();
    for k=1:n
        key=keys{k};
        if isfield(ft,key)
            value=ft.(key);
            if size(value,1)==nsamples
                sa.(key)=value;
            end
        end
    end







function ds=apply_ft_source_inside(ds,dim_labels,dim_values,ft_inside)
    ndim=numel(dim_labels);
    dim_sizes=cellfun(@numel,dim_values);

    pos_idx=find(cosmo_match(dim_labels,'pos'),1);
    assert(~isempty(pos_idx),['this function should only be called '...
                                'with source datasets']);

    if isnumeric(ft_inside)
        pos=ds.a.fdim.values{pos_idx};
        [three,npos]=size(pos);
        assert(three==3);
        inside_mask=false(npos,1);
        inside_mask(ft_inside)=true;
    elseif islogical(ft_inside)
        inside_mask=ft_inside;
    else
        error('.inside must either be numeric or logical');
    end


    inside_vec_size=[1 ones(1,ndim)];
    inside_vec_size(1+pos_idx)=numel(inside_mask);
    inside_array_vec=reshape(inside_mask, inside_vec_size);

    other_dim_size=[1 dim_sizes(:)'];
    other_dim_size(1+pos_idx)=1;

    inside_array=repmat(inside_array_vec,other_dim_size);
    inside_ds=cosmo_flatten(inside_array, dim_labels, dim_values,2,...
                                         'matrix_labels',{'pos'});
    ds=cosmo_slice(ds,inside_ds.samples,2);




function [data, sample_field]=get_data_ft(ft,opt)
    if is_ft_source_struct(ft)
        [data, sample_field]=get_source_data_ft(ft, opt);
    else
        [data, sample_field]=get_sensor_data_ft(ft);
    end

function [data, sample_field]=get_source_data_ft(ft, opt)
    main_fields={'trial','avg'};
    sub_fields={'pow','mom'};

    msk_main=cosmo_match(main_fields,fieldnames(ft));
    switch sum(msk_main)
        case 0
            error('No data found in source struct');

        case 1
            % ok

        otherwise
            error('Multiple data fields found in source struct');
    end

    main_field=main_fields{msk_main};
    main_data=ft.(main_field);

    sub_field_option='data_field';
    if isfield(opt,sub_field_option)
        sub_field=opt.(sub_field_option);
    else
        msk_sub=cosmo_match(sub_fields,fieldnames(main_data));

        switch sum(msk_sub)
            case 0
                error('No data found in .%s source struct', main_field);

            case 1
                sub_field=sub_fields{msk_sub};

            otherwise
                error(['Multiple data fields found in .%s source '...
                        'struct: ''%s''. To select one of these, use '...
                        'the ''%s'' option'],...
                        main_field,...
                        cosmo_strjoin(sub_fields(msk_sub),''', '''),...
                        sub_field_option);
        end
    end

    data=extract_source_data_array_ft(main_data, sub_field);
    sample_field=sprintf('%s.%s',main_field,sub_field);

function data=extract_source_data_array_ft(main_data, sub_field)
    nsamples=numel(main_data);
    first_data=main_data(1).(sub_field);
    data_cell=cell(nsamples,1);


    switch sub_field
        case 'mom'
            npos=numel(first_data);
            data_inside_pos=find(~cellfun(@isempty,first_data));
            ninside=numel(data_inside_pos);

            [nmom, nfeatures]=size(first_data{data_inside_pos(1)});

            data_arr_empty=NaN(1,npos,nmom,nfeatures);

            for j=1:nsamples
                data_cell_cell=main_data(j).(sub_field);
                data_arr=data_arr_empty;

                for k=1:ninside
                    pos=data_inside_pos(k);
                    data_arr(1,pos,:,:)=data_cell_cell{pos};
                end

                data_cell{j}=data_arr;
            end


        case 'pow'
            feature_size=size(first_data);

            for j=1:nsamples
                data_arr=main_data(j).(sub_field);
                data_cell{j}=reshape(data_arr,[1 feature_size]);
            end

        otherwise
            error('not supported sub_field: %s', sub_field);
    end

    data=cat(1,data_cell{:});




function [data, sample_field]=get_sensor_data_ft(ft)
    % order precedence: if .trial and .avg both exist, take .trial
    sample_fields_in_order={'trial','individual',...
                            'fourierspctrm','powspctrm','avg'};
    msk=cosmo_match(sample_fields_in_order, fieldnames(ft));

    if ~any(msk)
        error(['No supported data field found in sensor data struct. '...
                'If this data struct was generated by FieldTrip, '...
                'consider contacting CoSMoMVPA''s authors.']);
    end

    i=find(msk,1);
    sample_field=sample_fields_in_order{i};

    data=ft.(sample_field);

    if isempty(get_sample_dimord_field(ft))
                        % no 'rpt_...' or 'subj_'
        data=reshape(data,[1 size(data)]);
    end



function tf=is_ft_source_struct(ft)
    tf=isstruct(ft) && isfield(ft,'pos');

function tf=is_ft_sensor_struct(ft)
    tf=isfield(ft,'dimord') && isfield(ft,'label');

function tf=is_ft_struct(ft)
    tf=is_ft_source_struct(ft) || is_ft_sensor_struct(ft);



%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% eeglab struct helper function
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function tf=is_eeglab_struct(s)
    tf=isstruct(s) && ...
            isfield(s,'times') && ...
            isfield(s,'datatype');


function ds=read_eeglab(fn,opt)
    if isfield(opt,'trials')
        % can have significant reduction in memory requirements
        % at the expense of loading different parts of the same file
        % multiple times.
        s=eeglab_load_with_trials(fn,opt.trials);
        opt=rmfield(opt,'trials');
    else
        s=load(fn,'-mat');
    end
    ds=convert_eeglab_struct(s,opt);

function s=eeglab_load_with_trials(fn,trial_idx)
    % can save memory by loaded each channel seperately
    s_minimal=load(fn,'-mat','datatype');
    has_freq=helper_eeglab_get_freq_info(s_minimal);

    w=whos('-file',fn);
    s_surrogate=struct();
    for k=1:numel(w)
        s_surrogate.(w(k).name)=[];
    end

    [chan_prefix,chan_suffix]=eeglab_get_chan_pre_suffix(s_surrogate);
    [chan_labels]=eeglab_get_chan_labels(s_surrogate,...
                                    chan_prefix,chan_suffix);
    other_labels=setdiff({w.name},chan_labels);
    assert(numel(other_labels)<numel(w));

    s=load(fn,'-mat',other_labels{:});
    n_chan=numel(chan_labels);
    for k=1:n_chan
        label=chan_labels{k};
        data_struct=load(fn,'-mat',label);
        data=data_struct.(label);

        sliced_data=helper_eeglab_slice_chan(data,trial_idx,has_freq);
        s.(label)=sliced_data;
    end


function result=helper_eeglab_slice_chan(data, trial_idx, has_freq)
    assert(islogical(has_freq));

    if has_freq
        trial_dim=3;
    else
        trial_dim=1;
    end

    % ensure enough trials
    n_trials=size(data,trial_dim);
    verify_trial_params(n_trials,trial_idx);

    if has_freq
        result=data(:,:,trial_idx);
    else
        assert(numel(size(data))<=2);
        result=data(trial_idx,:);
    end

function [has_freq,freq_label,freq_values]=helper_eeglab_get_freq_info(s)
    samples_type=eeglab_get_samples_type(s);
    has_freq=strcmp(samples_type,'timefreq');
    if nargout<=1
        return;
    end

    if has_freq
        freq_label={'freq'};
        freq_values={s.freqs};
    else
        freq_label={};
        freq_values={};
    end



function ds=convert_eeglab_struct(s, opt)
    samples_type=eeglab_get_samples_type(s);
    [has_freq,freq_label,freq_values]=helper_eeglab_get_freq_info(s);
    freq_size=cellfun(@numel,freq_values);

    [chan_prefix,chan_suffix]=eeglab_get_chan_pre_suffix(s, opt);

    is_baseline=~isempty(regexp(chan_suffix,'base$','once'));
    if is_baseline
        % because the baseline is computed over time
        time_size=[];
        time_values={};
        time_label={};
    else
        time_values={s.times};
        time_label={'time'};
        time_size=numel(time_values{1});
    end

    % set channels
    [eeglab_labels,chan_values]=eeglab_get_chan_labels(...
                                        s,chan_prefix,chan_suffix);
    n_chan=numel(chan_values);

    if isfield(s,'chanlabels')
        assert(numel(s.chanlabels)==n_chan);
    end



    data_cell=cell(n_chan,1);
    for k=1:n_chan
        % load data for each channel
        key=eeglab_labels{k};
        value=s.(key);

        if has_freq
            % it seems that for freq data, single trial data is the last
            % dimension, whereas for erp data, single trial data is the first
            % dimension. In any case we want to make single trial data the
            % first dimension
            has_trial_dim=size(value,3)~=1;
            if has_trial_dim
                value=shiftdim(value,2);
            else
                value=shiftdim(value,-1); % insert singleton dimension
            end
        end

        n_samples=size(value,1);
        size_chan_singleton=[n_samples,1,[freq_size],time_size];

        value_rs=reshape(value,size_chan_singleton);
        data_cell{k}=value_rs;
    end

    meeg_parameters=s.parameters;
    clear s;

    data=cat(2,data_cell{:});
    clear data_cell;

    ds=cosmo_flatten(data,[{chan_prefix},freq_label,time_label],...
                          [{chan_values},freq_values,time_values]);
    clear data;

    ds.sa=struct();
    ds.a.meeg.samples_field='trial';
    ds.a.meeg.samples_type=samples_type;
    ds.a.meeg.samples_label='rpt';

    ds.a.meeg.parameters=meeg_parameters;

    ds=posthoc_slice_dataset_if_necessary(ds,opt);


function [chan_prefix,chan_suffix]=eeglab_get_chan_pre_suffix(s,opt)
    keys=fieldnames(s);

    numeric_infix='1';
    pat=['^(\D+)' numeric_infix '([\D_]*$)'];
    matches=regexp(keys,pat,'once','tokens');

    match_idx=find(~cellfun(@isempty,matches));

    if numel(match_idx)==1
        unique_match=matches{match_idx};
    else
        unique_match=eeglab_select_idx_chan_pref_suffix(matches,opt);
    end

    chan_prefix=unique_match{1};
    chan_suffix=unique_match{2};


function match=eeglab_select_idx_chan_pref_suffix(all_matches,opt)
% typical use case is data with *_erspbase and _ersp data
    key='data_field';

    % 'erspboot' are bootstrap estimates - no idea how to deal with these
    % so for now they are not supported
    not_supported_values={'erspboot'};

    % get fieldnames to keep
    get_match_name=@(x)x{2}(2:end);
    match_func=@(x)~isempty(x) && ...
                    ~cosmo_match({get_match_name(x)},not_supported_values);
    match_msk=cellfun(match_func,all_matches);
    matches=all_matches(match_msk);

    valid_values=cellfun(get_match_name,matches,...
                           'UniformOutput',false);

    suffix=sprintf('Valid options for the ''%s'' option are ''%s''.',...
                         key,cosmo_strjoin(valid_values,''', '''));

    % try to be helpful
    is_ersp=all(cosmo_match({'erspbase';'ersp'}, valid_values));
    if is_ersp
        suffix=sprintf(['%s\nThis data looks like an EEGLAB '...
                    'ERSP data structure. Note '...
                    'that the ''ersp'' option returns a '...
                    'raw dataset (presumably without baseline, ',...
                    'correction), whereas ''erspbase'' returns the '...
                    ' baseline '...
                    'values themselves (that can be used for baseline '...
                    'correction). It is currently not possible to '...
                    'return baseline corrected data with this '...
                    'function.'],suffix);
    end

    if ~isfield(opt,key)
        error('The ''%s'' option is required. %s', key, suffix);
    end

    value=opt.(key);
    if ~ischar(value)
        error('The ''%s'' option must be a string. %s',key, suffix);
    end

    idx=find(cosmo_match(valid_values,value));

    if isempty(idx)
        error(suffix);
    end
    match=matches{idx};



function [eeglab_labels,cosmo_labels]=eeglab_get_chan_labels(...
                                s,chan_prefix,chan_suffix)

    make_eeglab_label=@(idx)sprintf('%s%d%s',chan_prefix,idx,chan_suffix);
    % see how many labels there are
    count=0;
    while true
        label=make_eeglab_label(count+1);
        if ~isfield(s,label)
            break
        end
        count=count+1;
    end

    idxs=1:count;

    eeglab_labels=arrayfun(make_eeglab_label,idxs,'UniformOutput',false);
    if strcmp(chan_prefix,'comp')
        assert(~isfield(s,'chanlabels'));
        make_comp_label=@(idx)sprintf('comp%d',idx);
        cosmo_labels=arrayfun(make_comp_label,idxs,'UniformOutput',false);
    else
        cosmo_labels=s.chanlabels;
    end






function samples_type=eeglab_get_samples_type(s)
    mapping={'erp','timelock';...
            'timef','timefreq';...
            'itc','timefreq';...
            'ersp','timefreq';...
            'erspbase','baseline'};
    for k=1:size(mapping,1)
        row=mapping(k,:);
        if strcmp(lower(s.datatype),row{1})
            samples_type=row{2};
            return;
        end
    end

    error('unsupported datatype mapping ''%s''',s.datatype);

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% eeglab txt helper function
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

function ds=read_eeglab_txt(fn, opt)
    % reads eeglab time series data. returns data in fieldtrip-like format
    fid=fopen(fn);

    header_line=fgetl(fid); % read header
    chan_labels=cosmo_strsplit(header_line,'\t');
    chan_labels=chan_labels(2:(end-1)); % omit first & last bogus element

    nchan=numel(chan_labels);
    data_pat=cosmo_strjoin(repmat({'%n'},1,nchan+1),' ');

    % read data from file
    cell_data=textscan(fid,data_pat);

    % check all data was read
    neg_one=fgetl(fid);
    if neg_one~=-1
        error('Could not read all data from %s', fn);
    end

    %%%%%%%%%%%%%%%
    % data consistency checks

    % timepoints are in the first column, data in other columns
    timepoints=cell_data{1};
    nrows=numel(timepoints);

    [unused,t_trial]=cosmo_index_unique(timepoints);
    ntime=numel(t_trial);

    if mod(nrows,ntime)~=0 || ...
                ~all(all(bsxfun(@eq,reshape(timepoints,ntime,[]),...
                                  t_trial)))
        error('Data not contiguous or unexpected order of time points');
    end

    % number of trials
    ntrial=nrows/ntime;


    %%%%%%%%%%%%%%%
    % put the data in 3D array
    data=zeros(ntrial,nchan,ntime);
    for chan=1:nchan
        chan_data=cell_data{chan+1}; % skip first column as it has timepoints
        data(:,chan,:)=reshape(chan_data,ntime,ntrial)';
    end

    %%%%%%%%%%%%%%%
    % flatten and make it a dataset
    % (convert miliseconds to seconds along the way)
    dim_labels={'chan';'time'};
    dim_values={chan_labels(:)';.001*t_trial(:)'};

    % make a dataset
    ds=cosmo_flatten(data, dim_labels, dim_values);

    % set datatype to timelock-ish in fieldtrip-compatible way
    ds.a.meeg.samples_field='trial';
    ds.a.meeg.samples_type='timelock';
    ds.a.meeg.samples_label='rpt';

    % set sample info
    ds.sa.(ds.a.meeg.samples_label)=(1:ntrial)';

    ds=posthoc_slice_dataset_if_necessary(ds,opt);