cosmo statcode skl

function stat_repr=cosmo_statcode(ds, output_format)
% Convert statcode for different analysis packages
%
% stat_repr=cosmo_statcode(ds[, output_format])
%
% Inputs:
%   ds             dataset struct with N samples, or Nx1 cell with strings
%                  with statistic labels, or AFNI, BV or NIFTI header
%                  struct for N samples.
%   output_format  Optional; one of 'afni', 'bv', or 'nifti'; or empty.
%
% Returns:
%   stat_repr      - If output_format is empty or omitted: an Nx1 cell
%                    with a string representation of the statistic in each
%                    sample, e.g. 'Ftest(123,2)' or 'Zscore()' or empty.
%                  - If output_format=='afni': struct with field
%                    .BRICK_STATAUX.
%                  - If output_format=='bv': Nx1 cell with structs, each
%                    with fieldnames .name, .DF1 and .DF2.
%                  - If output_format=='nifti': struct with fieldnames
%                    .intent_code and .intent_p{1,2,3} if all stat codes
%                    are the same; empty otherwise (NIFTI does not support
%                    different stat codes for different samples).
%
% Notes:
%   - this function is intended for fmri datasets
%
% #   For CoSMoMVPA's copyright information and license terms,   #
% #   see the COPYING file distributed with CoSMoMVPA.           #

    if nargin<2, output_format=''; end

    % see what kind of input we got
    if iscellstr(ds)
        % string representation of stats
        stat_repr=ds(:);
    elseif isstruct(ds) && isfield(ds,'samples')
        % dataset struct
        if isfield(ds,'sa') && isfield(ds.sa,'stats')
            % get stat information
            stat_repr=ds.sa.stats;
        else
            % no stat information; return empty output
            stat_repr=[];
        end
    else
        % assume header from AFNI, BV or NIFTI; try to convert it to string
        % representation, or fail in hdr2strs if that's not the case
        stat_repr=hdr2strs(ds);
    end

    % convert to (name, df) pairs
    name_df=stat_strs2name_df(stat_repr);

    if isempty(output_format)
        % we're done
        return
    end

    % convert to package-specific header
    stat_repr=name_df2hdr(name_df, output_format);


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% helper functions
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function name_df=stat_strs2name_df(stat_strs)
% converts string representations to name and degrees of freedom
    match_regexp_failed=@(res) isempty(res) || isempty(res.name);

    n=numel(stat_strs);

    name_df=cell(n,2);
    for k=1:n
        stat_str=stat_strs{k};
        if isempty(stat_str)
            name_df{k,1}='';
        else
            % try with parentheses: 'stat_name(df1,df2,df3)'
            names=regexp(stat_str,'^(?<name>\w+)\((?<df>[,\d]*)\)$',...
                                                        'names');
            if match_regexp_failed(names)
                % try without parentheses: 'stat_name'
                names=regexp(stat_str,'^(?<name>\w+)$','names');
                df=[];
            else
                df_split=cosmo_strsplit(names.df,',');
                if isequal(df_split,{''})
                    df=[];
                else
                    df=cellfun(@str2num,df_split);
                end
            end

            if match_regexp_failed(names)
                error('Unable to parse stat string %s', stat_str);
            end

            name_df{k,1}=names.name;
            name_df{k,2}=df;
        end
    end


function hdr=name_df2hdr(name_df, output_format)
% converts statistic name and df to package-specific header
%
% hdr=name_df2hdr(name_df, output_format)
%
% Inputs:
%   name_df:        Kx2 cell with name and degrees of freedom, for K
%                   samples
%   output_format   'afni', 'bv', or 'nifti'
%
% Returns:
%   hdr             Part of the package-specific header containing the
%                   statistical information
    codes=get_codes(output_format);
    nsamples=size(name_df,1);

    stat_idxs=zeros(nsamples,1);
    names=codes(:,1);
    for k=1:nsamples
        name=name_df{k,1};
        if isempty(name)
            continue;
        end
        idx=find(cosmo_match(names, name));
        if numel(idx)==1
            % ignore if not found or multiple matches
            stat_idxs(k)=idx;
        elseif ~strcmp(name,'none')
            cosmo_warning('Unrecognized stat name %s at sample %d',name,k);
        end
    end

    switch output_format
        case 'afni'
            hdr=struct();

            % space for each sample
            auxs=cell(1,nsamples);

            % build BRICK_STATAUX struct
            for k=1:nsamples
                stat_idx=stat_idxs(k);
                if stat_idx==0
                    continue;
                end
                df=name_df{k,2};
                auxs{k}=[(k-1) stat_idx numel(df) df];
            end

            % concatenate information for all samples
            hdr.BRICK_STATAUX=[auxs{:}];

        case 'bv'
            maps=cell(nsamples,1);
            for k=1:nsamples
                map=struct();
                stat_idx=stat_idxs(k);
                map.Type=stat_idx;

                if stat_idx~=0
                    df=name_df{k,2};

                    if numel(df)>=1, map.DF1=df(1); end
                    if numel(df)>=2, map.DF2=df(2); end
                end

                maps{k}=map;
            end

            hdr=maps; % return a cell with maps

        case 'nifti'
            hdr=struct();

            if isempty(stat_idxs)
                return
            end

            unq_stat_idx=unique(stat_idxs);
            df=name_df(:,2);
            n=numel(df);
            ndf=cellfun(@numel,df);
            max_ndf=max(ndf);
            df_matrix=zeros(n, max_ndf);


            for k=1:n
                dfk=df{k};
                n_dfk=numel(dfk);
                if n_dfk>0
                    df_matrix(k,1:n_dfk)=dfk;
                end
            end

            unq_df=unique(df_matrix,'rows');

            if numel(unq_stat_idx)>1 || numel(unique(ndf))>1 || ...
                            size(unq_df,1)>1
                cosmo_warning(['Multiple stat codes found, unsupported '...
                                    'by nifti']);
                return
            end

            hdr.intent_code=unq_stat_idx;

            df=zeros(1,3);
            if ~isempty(unq_df)
                df(1:numel(unq_df))=unq_df;
            end
            hdr.intent_p1=df(1);
            hdr.intent_p2=df(2);
            hdr.intent_p3=df(3);

        otherwise
            assert(false,'should never come here');
    end


function stat_strs=hdr2strs(hdr)
% Converts package-specific header to stat string representation
%
% stat_strs=hdr2strs(hdr)
%
% Inputs:
%   hdr         AFNI, BV vmp, or NIFTI header
%
% Returns:
%   stat_strs   Kx1

    if isfield(hdr, 'Map') && (isfield(hdr,'VMRDimX') ||...
                                isfield(hdr,'NrOfVertices'))
        % bv vmp or smp
        codes=get_codes('bv');
        nsamples=numel(hdr.Map);
        stat_strs=cell(nsamples,1);

        for k=1:nsamples
            map=hdr.Map(k);
            tp=map.Type;
            df=[map.DF1 map.DF2];
            stat_strs{k}=stat_code2str(codes, tp, df);
        end

    elseif isstruct(hdr) && isfield(hdr,'DATASET_RANK')
        % afni
        nsamples=hdr.DATASET_RANK(2);
        stat_strs=repmat({''},nsamples,1);

        if isfield(hdr,'BRICK_STATAUX')
            codes=get_codes('afni');
            aux=hdr.BRICK_STATAUX;
            naux=numel(aux);
            pos=0;
            while (pos+2)<naux
                pos=pos+1;
                brik_idx=aux(pos)+1; % base0 => base1
                stat_code=aux(pos+1); % stat code
                n_df=aux(pos+2); % # of df
                if naux<pos+2+n_df
                    break;
                end
                df=aux(pos+2+(1:n_df)); % get dfs
                stat_strs{brik_idx}=stat_code2str(codes, stat_code, df);
                pos=pos+2+n_df;
            end

            % sanity check
            if pos~=naux
                error('Not all elements were processed in STATAUX %s',...
                            sprintf('%d ', aux));
            end
        end

    elseif isstruct(hdr) && isfield(hdr,'dime') && isfield(hdr.dime,'dim')
        % nifti
        codes=get_codes('nifti');

        dime=hdr.dime;
        % deal with potential data in >4th dimension
        % (AFNI-NIFTI conversion syndrome)
        nsamples=max(prod(dime.dim(5:end)),dime.dim(5));

        % get stat codes
        stat_code=dime.intent_code;
        stat_df=[dime.intent_p1 dime.intent_p2 dime.intent_p3];
        stat_str=stat_code2str(codes,stat_code,stat_df);

        % repeat for as many samples as there are in the dataset
        stat_strs=repmat({stat_str},nsamples,1);
    else
        error('Unsupported input');
    end


function str=stat_code2str(codes, stat_code, df)
% give string representation of stat code with degrees of freedom

% (df can have more elements than required; superfluous ones are ignored.)
    if stat_code==0
        str='';
        return
    end

    name=codes{stat_code,1};

    if isempty(name)
        error('Illegal stat_code: %d', stat_code);
    end

    % build string representation of df
    df_count=codes{stat_code,2};
    df_str=cellfun(@(x) sprintf('%d',x),num2cell(df(1:df_count)),...
                        'UniformOutput',false);

    % join with stat name
    str=sprintf('%s(%s)',name,cosmo_strjoin(df_str,','));



function codes=get_codes(package)
% get names and number of degrees of freedom for an analysis package
%
% codes=get_codes(package)
%
% Inputs:
%   package      'nifti', 'afni' or 'bv'.
%
% Returns:
%   codes        Px2 cell, with codes{k,1} the name of the k-th stat
%                and codes{k,2} the number of degrees of freedom


    switch package
        case 'nifti'
            % http://nifti.nimh.nih.gov/nifti-1/documentation/...
            %                 nifti_stats.pdf
            codes={'',0;... % not defined
                    'Correl',1;... % first one starts at 2
                    'Ttest',1;...
                    'Ftest',2;...
                    'Zscore',0;...
                    'Chisq',1;...
                    'Beta',2;...
                    'Binom',2;...
                    'Gamma',2;...
                    'Poisson',1;...
                    'Normal',2,;...
                    'Ftest_Nonc',3;...
                    'Chisq_Nonc',2;...
                    'Logistic',2;...
                    'Laplace',2;...
                    'Uniform',2;...
                    'Ttest_Nonc',2;...
                    'Weibull',3;...
                    'Chi',1;...
                    'Invgauss',2;...
                    'Extval',2;...
                    'Pval',0;...
                    'Logpval',0;...
                    'Log10pval',0};


        case 'afni'
            % http://afni.nimh.nih.gov/pub/dist/doc/program_help/...
            %                README.attributes.html
            % AFNI uses a subset of NIFTI.
            nifti_codes=get_codes('nifti'); % recursive call
            codes=nifti_codes(1:10,:);


        case 'bv'
            % http://support.brainvoyager.com/documents/Available_Tools/...
            %              Available_Plugins/niftiplugin_manual_v12.pdf
            % why follow a standard (like NIFTI) if one can
            % come up with something incompatible too?
            codes={'Ttest',1;...      % 1
                      'Correl',1;...  % 2  correlation
                      'Correl',1;...  % 3 "cross-correlation"
                      'Ftest',2;...   % 4
                      'Zscore',0;...  % 5
                      '',0;...        % 6 [lots of undefined codes here]
                      '',0;...        % 7
                      '',0;...        % 8
                      '',0;...        % 9
                      '',0;...        % 10
                      '',0;...        % 11 "percent signal change" ignored
                      '',0;...        % 12 "ICA" ignored
                      '',0;...        % 13
                      'Chisq',1;...   % 14
                      'Beta',2;...    % 15
                      'Pval',0};      % 16
        otherwise
            error('Unsupported analysis package %s', package);
    end