cosmo surficial neighborhood skl

function [nbrhood,vo,fo,out2in]=cosmo_surficial_neighborhood(ds, surfs, varargin)
% neighborhood definition for surface-based searchlight
%
% [nbrhood,vo,fo,out2in]=cosmo_surficial_neighborhood(ds, surfs, ...)
%
% Inputs:
%    ds            fMRI-like volumetric or surface-based dataset
%    surfs         cell with specification of surfaces. The following
%                  formats are valid options:
%                  # Volumetric datasets:
%                    * Single surface (Caret, BrainVoyager)
%                      - {fn, [start,stop]}
%                      - {fn, [start,stop], niter}
%                      - {v,f,[start,stop], niter}
%                      where
%                         + fn is the filename of a single surface
%                         + start and stop are distances towards and away
%                           from the center of a hemisphere. For example,
%                           if surface coordinates are in mm (which is
%                           typical), start=-3 and stop=2 means selecting
%                           voxels that are 3 mm or closer to the surface
%                           on the white-matter side, up to voxels that are
%                           2 mm from the surface on the pial matter side
%                         + niter is the number of subsampling (mesh
%                           decimation) iterations to use for the output
%                           surface.
%                         + v are Px3 coordinates for P nodes
%                         + f are Qx3 node indices for Q faces
%                    * Twin surface (FreeSurfer)
%                      - {fn1, fn2}
%                      - {fn1, fn2,         fn_o}
%                      - {fn1, fn2,         niter}
%                      - {v1,v2,f}
%                      - {v1,v2,f,v_o,f_o}
%                      - {v1,v2,f,niter}
%                      where
%                         + fn{1,2} are filenames for pial and white
%                           surfaces
%                         + fn_o is the filename of an intermediate
%                           (node-wise average of a pial and white
%                           surface) output surface, and can be of a lower
%                           resolution than the pial and white surfaces.
%                           This  can be achieved by using MapIcosahedron
%                           to generate meshes of different densities, e.g.
%                           ld=64 for the pial and white surface and ld=16
%                           for the intermediate surface. It is required
%                           that for every node in the intermediate surface
%                           there is a corresponding node (at the same
%                           spatial location) on the node-wise average of
%                           the pial and white surface.
%                         + v{1,2} are Px3 coordinates for P nodes of the
%                           pial and white surfaces
%                         + f are Qx3 node indices for Q faces on the pial
%                           and white surfaces
%                         + v_o and f_o are the nodes and vertices of an
%                           intermediate output surface. It is required
%                           that the vertices in v_o are a subset of the
%                           pair-wise averages of vertices in v1 and v2. A
%                           typical use case is using standard topologies
%                           from AFNI's MapIcosahedron, where the v1 and v2
%                           are high-res with X linear divisions, v_o is
%                           low-res with Y linear divisions, and Y<X and X
%                           is an integer multiple of Y.
%                         + niter is the number of subsampling (mesh
%                           decimation) iterations to use for the output
%                           surface.
%                  # Surface-based datasets:
%                    - {fn}
%                    - {fn, niter}
%                    - {v, f, niter}
%                        select neighbors:
%    'radius', r         } either within radius r (usually in mm), grow
%    'count', c          } the radius to get the nearest c neighbors,
%    'area', a           } grow the radius to get neighbors whose area size
%                        } is less than or equal area a (usually in mm^2)
%    'direct', true      } or get direct neighbors only (nodes who share an
%                        } edge)
%                        These four options are mutually exclusive
%    'metric',metric     distance metric along cortical surface. One of
%                        'geodesic' (default), 'dijkstra' or 'euclidean'.
%    'line_def',line_def definition of lines from inner to outer surface
%                        See surfing_nodeidxs2coords
%    'progress', p       Show progress every p centers (default: 100)
%    'vol_def', v        Volume definition with fields .mat (4x4 affine
%                        voxel-to-world matrix) and .dim (1x3 number of
%                        voxels in each dimension). If omittied, voldef is
%                        based on ds
%    'subsample_min_ratio', r   When niter is provided and positive, it
%                               defines the minimum ratio between surfaces
%                               of old and new faces. This should help in
%                               preventing the generation of triangles with
%                               angles close to 180 degrees (default: 0.2).
%    'center_ids', c     Center ids in output surface of the nodes for
%                        which the neighborhood should be computed. Empty
%                        means all nodes are used (default: [])
%
% Outputs:
%     nbrhood       neighborhood struct with fields
%                   .neighbors       Mx1 cell with .neighbors{k} the
%                                    indices of features (relative to ds)
%                                    in the neighborhood of node k
%                   .a.fdim.labels   set to {'node_indices'}
%                   .a.fdim.values   set to {center_ids} with center_ids
%                                    Mx1 ids of each neighborhood
%                   .fa.node_indices identical to center_ids
%                   .fa.radius       } if ds is a surface-based dataset,
%                   .fa.area         } these fields are present and contain
%                                    } the radius and total surface area of
%                                      the selected nodes
%     v_o           NVx3 coordinates for N nodes of the output surface
%     f_o           NFx3 node indices for Q faces of the output surface
%     out2in        Node mapping from output to input surface.
%
% Example:
%     % this example uses BrainVoyager Surfaces
%     anat_fn='SUB02_MPRAGE_ISO_IIHC_TAL.vmr';
%     surf_fn='SUB02_MPRAGE_ISO_IIHC_TAL_LH_RECOSM.srf';
%
%     % read dataset and surface
%     ds=cosmo_fmri_dataset(anat_fn,'mask',anat_fn);
%     fprintf('Dataset has %d samples and %d features\n',size(ds.samples));
%
%     [v,f]=surfing_read(surf_fn);
%     fprintf('Input surface has %d nodes and %d faces\n',size(v,1),size(f,1))
%
%     % define neighborhood parameters
%     count=100; % 100 voxels per searchlight
%     niter=10;    % 10 mesh decimation algorithms
%
%     % surface definition; voxels are selected that are 3 mm or closer to the
%     % surface on the white-matter side, up to voxels that are 1 mm from the
%     % surface on the pial matter side.
%
%     % Note: for FreeSurfer surfaces one could use
%     %       {surf_white_fn, surf_pial_fn [,niter]}
%     surfs={surf_fn,[-3 1],niter};
%     [nbrhood,vo,fo]=cosmo_surficial_neighborhood(ds,surfs,'radius',radius);
%     fprintf('Neighborhood has %d elements\n', numel(nbrhood.neighbors))
%     fprintf('Output surface has %d nodes and %d faces\n',size(vo,1),size(fo,1))
%
%     % write decimated (smaller) output surface in ASCII format
%     surfing_write(['small_surf.asc'],vo,fo);
%
%     % define a measure that counts the number of features in each searchlight
%     % (in more practical applications the measure could be, for example,
%     %  cosmo_correlation_measure or cosmo_crossvalidation_measure)
%     voxel_counter=@(x,opt) cosmo_structjoin('samples',size(x.samples,2));
%
%     % run a searchlight
%     res=cosmo_searchlight(ds,voxel_counter,'nbrhood',nbrhood);
%     fprintf('Searchlights have mean %.3f +/- %.3f features\n',...
%                     mean(res.samples), std(res.samples));
%
%
%     % store surface dataset (in AFNI/SUMA NIML format) with the number of
%     % features (voxels) for each center node of the output surface
%     s=struct();
%     s.node_indices=res.a.fdim.values{1}(res.fa.node_indices);
%     s.data=res.samples';
%     surfing_write('voxcount.niml.dset',s);
%
%     % store volume dataset (in NIFTI format) with the number of times each
%     % voxel was selected by a searchlight
%     vol=ds;
%     vol.samples(:)=0;
%     for k=1:numel(nbrhood.neighbors);
%         nbrs=nbrhood.neighbors{k};
%         vol.samples(nbrs)=vol.samples(nbrs)+1;
%     end
%     cosmo_map2fmri(vol,'voxcount.nii');
%     cosmo_map2fmri(ds,'anat.nii');
%
% Notes
%  - Higher values of niter, or using a less dense mesh for fn_o or
%    (v_o, f_o) means that the output surface has fewer nodes, which will
%    decrease execution time for searchlights.
%  - This function requires the surfing toolbox, surfing.sourceforge.net or
%    github.com/nno/surfing
%
% See also: surfing, surfing_nodeidxs2coords, cosmo_searchlight
%
% #   For CoSMoMVPA's copyright information and license terms,   #
% #   see the COPYING file distributed with CoSMoMVPA.           #

cosmo_check_external('surfing');

check_input(surfs, varargin{:});

defaults=struct();
defaults.metric='geodesic';
defaults.line_def=[10 0 1];
defaults.progress=100;
defaults.vol_def=[];
defaults.subsample_min_ratio=.2;
defaults.center_ids=[];
opt=cosmo_structjoin(defaults,varargin{:});

ds_type=get_ds_type(ds);

if strcmp(opt.metric,'geodesic') && ~isfield(opt,'direct')
    cosmo_check_external('fast_marching');
end

% get surfaces
[v1,v2,f,vo,fo]=parse_surfs(surfs, ds_type);
ds_is_surface=strcmp(ds_type, 'surface');

one_surface=numel(v2)==2 || (ds_is_surface && isempty(v2));

% define intermediate high-res surface
if one_surface
    vi=v1;
else
    vi=(v1+v2)*.5;
end

switch numel(vo)
    case 0
        % no low-res surface, so use intermediate surface as source
        vo=vi;
        fo=f;
    case 1
        % subsample the surface
        [vo,fo]=surfing_subsample_surface(vi,f,vo,...
                                opt.subsample_min_ratio, opt.progress);
end

% find mapping from low to high res surface
% if they are identical then low2high is the identity
out2in=surfing_maplow2hires(vo', vi');



% set center ids
center_ids=opt.center_ids(:)'; % ensure row vector
if isempty(opt.center_ids)
    center_ids=1:size(vo,1);
else
    out2in=out2in(center_ids);
end

% either surface or volumetric input dataset
ds_is_surface=cosmo_check_dataset(ds,'surface',false);
ds_is_volume=cosmo_check_dataset(ds,'fmri',false);

if sum([ds_is_surface, ds_is_volume])~=1
    error('Unsupported dataset: expected surface or fmri dataset');
end

if ds_is_surface
    % todo: use out2in to allow for subset of node indices as center
    %       (the current implementation computes neighborhoods for all
    %       nodes)
    if ~isequal(center_ids,1:size(vo,1))
        error('Unsupported center_ids option');
    end

    if ~isequal(out2in',1:numel(out2in))
        %error('Unsupported other output surface than input surface');
    end

    nbrhood=surface_to_surface_neighborhood(ds,vi,f,opt);


elseif ds_is_volume
    % get circle definition
    circle_def=get_circle_def(opt);

    % set volume definition
    if isempty(opt.vol_def)
        vol_def=ds.a.vol;
    else
        vol_def=opt.vol_def;
    end



    % define voxel mask based on features in dataset
    ds_one=cosmo_slice(ds,1);          % arbitrary sample
    ds_one.samples(:)=1;               % all set to one
    ds_unflat=cosmo_unflatten(ds_one); % missing features have value of zero
    vol_def.mask=ds_unflat~=0;          % define mask


    n2v=surfing_voxelselection(v1',v2',f',circle_def,vol_def,...
                                             out2in,opt.line_def,...
                                             opt.metric,0+opt.progress);

    ncenters=numel(center_ids);
    assert(ncenters==numel(n2v));
    % determine unique center ids used
    [unq_center_ids,unused,center_ids_mapping]=unique(center_ids);

    % find mapping from features
    nf_full=numel(vol_def.mask);
    nf_mask=sum(vol_def.mask(:));
    all2msk_indices=zeros(1,nf_full);
    all2msk_indices(vol_def.mask)=1:nf_mask;

    % store neighborhood information
    nbrhood=struct();
    nbrhood.a.fdim.labels={'node_indices'};
    nbrhood.a.fdim.values={unq_center_ids};
    nbrhood.fa.node_indices=center_ids_mapping(:)';

    % set neighbors while considering the masked nature of the input dataset
    neighbors=cell(ncenters,1);
    for k=1:ncenters
        msk_indices=all2msk_indices(double(n2v{k}));
        assert(all(msk_indices>0));
        neighbors{k}=msk_indices;
    end
    nbrhood.neighbors=neighbors;

else
    assert(false,'this should never happen');
end

% compatibility wrapper for the surfing toolbox
nbrhood=ensure_neighbors_row_vectors(nbrhood);

origin=struct();
origin.a=ds.a;
origin.fa=ds.fa;
nbrhood.origin=origin;


cosmo_check_neighborhood(nbrhood,ds);


function nbrhood=surface_to_surface_neighborhood(ds,vertices,faces,opt)
    circle_def=get_circle_def(opt);
    dim_label='node_indices';

    [two,fdim_index,attr_name,dim_name]=cosmo_dim_find(ds,...
                                        dim_label,true);

    fdim_nodes=ds.a.(dim_name).values{fdim_index};
    fa_indices=ds.(attr_name).(dim_label);

    if ~isequal(sort(fdim_nodes), unique(fdim_nodes))
        error('values in .a.%s.values{%d} are not all unique',...
                    dim_name, fdim_index);
    end



    % - unq_nodes contains the node index associated with each unique
    %   node_indices feature in the dataset
    [fa_idxs,unq_fa_indices]=cosmo_index_unique(fa_indices');
    unq_nodes=fdim_nodes(unq_fa_indices);

    nvertices=size(vertices,1);

    too_large_index=find(unq_nodes>nvertices,1);
    if any(too_large_index)
        error(['surface has %d vertices, but maximum .fa.%s '...
                    'is %d'],...
                    nvertices,dim_label,fa_node_ids(too_large_index));
    end

    ignore_vertices_msk=true(nvertices,1);
    ignore_vertices_msk(unq_nodes)=false;

    vertices(ignore_vertices_msk,:)=NaN;

    % set mapping from nodes to feature ids
    ncenters=numel(unq_fa_indices);
    node2feature_ids=cell(1,nvertices);
    for k=1:ncenters
        node=unq_nodes(k);
        if ignore_vertices_msk(node)
            feature_ids=cell(1,0);
        else
            feature_ids=fa_idxs{k};
        end
        node2feature_ids{node}=feature_ids;
    end


    % run node selection
    [n2ns,radii]=surfing_nodeselection(vertices',faces',circle_def,...
                                        opt.metric,opt.progress);

    % set output
    nbrhood=struct();
    nbrhood.a.fdim.labels={'node_indices'};
    nbrhood.a.fdim.values={fdim_nodes};


    nbrhood.neighbors=cell(ncenters,1);
    nbrhood.fa.radius=zeros(1,ncenters);
    nbrhood.fa.node_indices=zeros(1,ncenters);

    % add area fieldname
    node2area=surfing_surfacearea(vertices,faces);
    nbrhood.fa.area=zeros(1,ncenters);

    for k=1:ncenters
        center_node=unq_nodes(k);
        nbrhood.fa.node_indices(k)=unq_fa_indices(k);
        nbrhood.fa.radius(k)=radii(center_node);
        nbrhood.fa.area(k)=sum(node2area(n2ns{center_node}));

        if ignore_vertices_msk(center_node)
            around_feature_ids=zeros(1,0);
        else
            around_nodes=n2ns{center_node};
            around_feature_ids=cat(1,node2feature_ids{around_nodes});
        end

        nbrhood.neighbors{k}=around_feature_ids(:)';
    end




function nbrhood=ensure_neighbors_row_vectors(nbrhood)
    % compatibility wrapper for the surfing toolbox
    for k=1:numel(nbrhood.neighbors)
        if size(nbrhood.neighbors{k},1)~=1
            nbrhood.neighbors{k}=nbrhood.neighbors{k}';
        end
    end

function [v1,v2,f,vo,fo]=parse_surfs(surfs, ds_type)
    % helper function to get surfaces

    ds_is_surface=strcmp(ds_type,'surface');

    [v1,v2,f,vo,fo]=parse_surfs_arguments(ds_is_surface, surfs);

    if numel(v2)~=2 && (numel(f)==2 || isempty(f))
        % swap position
        temp=f;
        f=v2;
        v2=temp;
    end

    check_surf_arguments(ds_is_surface,v1,v2,f,vo,fo);


function check_surf_arguments(ds_is_surface,v1,v2,f,vo,fo)


    if isempty(v1) || (isempty(v2) && ~ds_is_surface) || isempty(f)
        error('Not enough arguments for surfaces and topology');
    end

    % c2 can be vector with two elements indicating the size of the curved
    % cylinder with a circle as basis; if not, it should be a surface with
    % the same size as c1
    one_surface=numel(v2)==2;

    if ~(one_surface || ds_is_surface || isequal(size(v1),size(v2)))
        error('Size mismatch between surfaces: %dx%d != %dx%d',...
                    size(v1,1), size(v1,2),...
                    size(v2,1), size(v2,2));
    end

    surfing_check_surface(v1,f);
    if ~(one_surface || ds_is_surface)
        surfing_check_surface(v2,f);
    end

    if isempty(fo)
        if numel(vo)>1
            % if not a scaler (niter) then throw an error
            error('Topology missing for output surface');
        end
    else
        surfing_check_surface(vo,fo);
    end



function [v1,v2,f,vo,fo]=parse_surfs_arguments(ds_is_surface,surfs)
    if ~iscell(surfs)
        error('surfs argument must be a cell');
    end

    n=numel(surfs);

    % space for output
    v1=[];
    v2=[];
    f=[];
    vo=[];
    fo=[];

    for k=1:n
        s=surfs{k};
        if ischar(s)
            % filename; read the surface
            [c,f_]=surfing_read(s);
            if isempty(v1)
                v1=c;
                f=f_;
            elseif isempty(v2) && ~ds_is_surface
                if ~isequal(f_,f)
                    error('Topology mismatch between the two surfaces');
                end
                v2=c;
            elseif isempty(vo)
                vo=c;
                fo=f_;
            else
                error('Superfluous argument at position %d', k);
            end
        elseif isnumeric(s)
            % numeric array; coordinates or faces
            if isempty(v1)
                v1=s;
            elseif isempty(v2)
                v2=s;
            elseif isempty(f)
                f=s;
            elseif isempty(vo)
                vo=s;
            elseif isempty(fo)
                fo=s;
            else
                error('Superfluous argument at position %d', k);
            end
        else
            error('Expected surface filename or array at position %d', k);
        end
    end


function ds_type=get_ds_type(ds)

    supported_ds_types={'surface','fmri'};
    n=numel(supported_ds_types);

    for k=1:n
        supported_ds_type=supported_ds_types{k};
        if cosmo_check_dataset(ds,supported_ds_type,false)
            ds_type=supported_ds_type;
            return
        end
    end

    % maybe it is not a dataset at all, check this possibility
    cosmo_check_dataset(ds);

    % it is a valid dataset but not fmri or surface;
    % try to give an appropriate error message
    for k=1:n
        supported_ds_type=supported_ds_types{k};
        if is_ds_type_like(ds, supported_ds_type)
            % let it throw an error
            cosmo_check_dataset(ds,supported_ds_type);
        end
    end

    error('Unknown dataset type, supported are: %s.', ...
                    cosmo_strjoin(supported_ds_types,', '));


function tf=is_ds_type_like(ds, type)
    % helper function that uses heuristics to find type of dataset
    tf=false;

    switch type
        case 'surface'
            if cosmo_isfield(ds,'fa.node_indices')
                tf=true;
                return;
            end

            if cosmo_isfield(ds,'a.fdim.values') && ...
                        isequal({'node_indices'},ds.a.fdim.values)
                tf=true;
                return;
            end

        case 'fmri'
            if any(cosmo_isfield(ds,{'fa.i','fa.j','fa.k'}))
                tf=true;
                return;
            end

            if cosmo_isfield(ds,'a.fdim.values') && ...
                        any(cosmo_match({'i','j','k'},ds.a.fdim.values))
                tf=true;
                return
            end

        otherwise
            error('unsupported type %s', type);
    end

function check_input(surfs, varargin)
    % give deprecation notice
    if isnumeric(surfs)
        error(['Second argument must be a cell with a surface '...
                'definition (as of Jan 2015, the syntax for this '...
                'function has changed)']);
    end


function circle_def=get_circle_def(opt)
    metric2circle_def_func=struct();
    metric2circle_def_func.radius=@(x)x;
    metric2circle_def_func.count=@(x)[0 x]; % fixed initial radius
    metric2circle_def_func.area=@(x)[5 Inf x]; % default initial radius
    metric2circle_def_func.direct=@get_direct_circle_def;

    % options are mutually exclusive
    metrics=fieldnames(metric2circle_def_func);
    metric_msk=cosmo_isfield(opt,metrics);
    if sum(metric_msk)~=1
        error('Use one of these arguments to define neighbors: %s',...
                cosmo_strjoin(metrics, ', '));
    end

    metric=metrics{metric_msk};
    func=metric2circle_def_func.(metric);
    param=opt.(metric);
    if ~isscalar(param) || param<0
        error('value for ''%s'' must be a scalar not less than zero',...
                metric);
    end
    circle_def=func(opt.(metric));

function circle_def=get_direct_circle_def(radius)
    % false or 0 give a zero radius, otherwise NaN
    if isnan(radius) || radius
        circle_def=NaN;
    else
        circle_def=[5 1];
    end