function cval=cosmo_measure_clusters(sample,nbrhood_mat,cluster_stat,varargin)
% General cluster measure
%
% cval=cosmo_measure_clusters(sample,nbrhood_mat,method,['dh',dh|'threshold',t],...)
%
% Inputs:
% sample 1xP data array for k features
% nbrhood_mat MxP neighborhood matrix, if each feature has at
% most M neighbors. nbrhood_mx(:,k) should contain
% the feature ids of the neighbors of feature k
% (values of zero indicate no neighbors)
% cluster_stat 'tfce', 'max', 'maxsize', or 'maxsum'.
% 'dh',dh when method is 'tfce', the integral step size
% 'threshold',t if method is anything but 'tfce', the threshold
% level
% 'E', E (optional) when method is 'tfce': the extent
% exponent, default E=0.5
% 'H', E (optional) when method is 'tfce': the height
% exponent, default H=2
% 'feature_sizes', s (optional) 1xP element size of each feature;
% default is a vector with all elements equal to 1.
%
% Output:
% cval 1xP data array with cluster values.
% Define:
% cl_k(thr) Feature ids that are connected
% with feature k when sample is
% thresholded at thr.
% (with cl_k(thr)=[] if sample(k)<thr).
% e_k(thr) The extent of the cluster for feature
% k when thresholded at thr, i.e.
% e_k=sum[i=cl_k(thr)] s(i)
% max0[x=xs]v(x) the maximum value that v takes for
% any value in xs, or zero if xs is
% empty.
% The output for a feature k depends on cluster_stat:
% - 'tfce':
% cval(k)=sum[h=dh:dh:max(sample)] e_k(h)^E *h^H * dh
% i.e. a weighted sum of the extent and height of
% the cluster at different thresholds
% - 'max':
% cval(k)=max0[i=cl_k(thr)] samples(i))
% i.e. the maximum value in the cluster that contains k
% - 'maxsize'
% cval(k)=e_k(thr)
% i.e. the size of the cluster that contains k
% - 'maxsum'
% cval(k)=sum[i=cl_k(thr)] samples(i)
% i.e. the sum of the values in the cluster that
% contains k
%
% Examples:
% % very simple one-dimensional 'line' example
% samples= [1 2 1 1 0 2 2];
% nbrhood_mat= [1 1 2 3 4 5 6;...
% 2 2 3 4 5 6 7;...
% 0 3 4 5 6 7 0];
% % illustrate TFCE cluster stat. Note that the results vary little
% % as a function of dh, and that both features in clusters with a
% % larger extent but less extreme value and features in clusters with
% % smaller extent but more extreme values have larger values
% s=cosmo_measure_clusters(samples,nbrhood_mat,'tfce','dh',.1);
% cosmo_disp(s,'threshold',inf)
% %|| [ 0.77 2.86 0.77 0.77 0 3.49 3.49 ]
% %
% s=cosmo_measure_clusters(samples,nbrhood_mat,'tfce','dh',.05);
% cosmo_disp(s,'threshold',inf)
% %|| [ 0.618 2.88 0.618 0.618 0 3.63 3.63 ]
% %
% % illustrate other cluster stats. Note that the results varies more
% % as a function of the chosen threshold
% cosmo_measure_clusters(samples,nbrhood_mat,'max','threshold',1)
% %|| 2 2 2 2 0 2 2
% %
% cosmo_measure_clusters(samples,nbrhood_mat,'max','threshold',2)
% %|| 0 2 0 0 0 2 2
% %
% cosmo_measure_clusters(samples,nbrhood_mat,'maxsize','threshold',1)
% %|| 4 4 4 4 0 2 2
% %
% cosmo_measure_clusters(samples,nbrhood_mat,'maxsize','threshold',2)
% %|| 0 1 0 0 0 2 2
% %
% cosmo_measure_clusters(samples,nbrhood_mat,'maxsum','threshold',1)
% %|| 5 5 5 5 0 4 4
% %
% cosmo_measure_clusters(samples,nbrhood_mat,'maxsum','threshold',2)
% %|| 0 2 0 0 0 4 4
%
% Notes:
% - the 'method' argument is similar to FieldTrip's clusterstat method,
% but does support 'tfce' and does not support 'wcm'
% - unlike FieldTrip's clusterstat, a value is returned for each feature
% (rather than each cluster).
% - TFCE is advised in the general case, because it finds a compromise
% between magnitude of values and extent of clusters.
% - this function is used by cosmo_montecarlo_cluster_stat for
% non-parametric cluster-based correction for multiple comparisons.
%
% References:
% - Stephen M. Smith, Thomas E. Nichols (2009), Threshold-free
% cluster enhancement: Addressing problems of smoothing, threshold
% dependence and localisation in cluster inference, NeuroImage, Volume
% 44, 83-98.
% - Maris, E., Oostenveld, R. Nonparametric statistical testing of EEG-
% and MEG-data. Journal of Neuroscience Methods (2007).
%
% See also: cosmo_convert_neighborhood, cosmo_montecarlo_cluster_stat
%
% # For CoSMoMVPA's copyright information and license terms, #
% # see the COPYING file distributed with CoSMoMVPA. #
persistent cached_varargin;
persistent cached_opt;
if isequal(varargin, cached_varargin)
opt=cached_opt;
else
opt=cosmo_structjoin(varargin);
cached_opt=opt;
cached_varargin=varargin;
end
% used for TFCE
defaults=struct();
defaults.E=.5;
defaults.H=2;
% get size of each feature
feature_sizes=get_feature_sizes(sample,opt);
% check input size
check_input_sizes(sample,nbrhood_mat);
% set cluster parameters depending on method; outputs are:
% delta_thr value by which the threshold is increased for every
% iteration (in case of TFCE), or the threshold value
% itself (for any other method)
% cluster_func function handle which takes as inputs:
% idxs feature ids of elements in a cluster
% sizes size of each feature; same size as data
% thr threshold value
% nthr number of thresholds (1 for any method except for
% TFCE)
[delta_thr,cluster_func,nthr,has_inf]=get_clustering_params(...
cluster_stat,...
sample,...
feature_sizes,...
opt,defaults);
% initially all features survive the mask, except non-finite ones
super_threshold_msk=~isnan(sample);
infinity_msk=isinf(sample) & sample>0;
% keep
thr_counter=0;
% cluster threshold variable. For TFCE this value is increased multiple
% times in the while-loop below. For all other methods it is increased
% just once
thr=0;
% allocate space for output
cval=zeros(size(sample));
while thr_counter<nthr
% increase threshold
thr=thr+delta_thr;
% update mask of features above threshold
super_threshold_msk=super_threshold_msk & sample>=thr;
if has_inf
% if output can contain infinity, break if all values are less
% than the threshold or inifity
if ~any(super_threshold_msk & ~infinity_msk)
% no features surive, clustering is completed
break;
end
else
% if output cannot contain infinity, break if all values are
% less than the threshold
if ~any(super_threshold_msk)
break;
end
end
% apply clustering
clusters=cosmo_clusterize(super_threshold_msk,nbrhood_mat);
% process each cluster
for k=1:numel(clusters)
cluster=clusters{k};
stat=cluster_func(cluster,thr);
cval(cluster)=cval(cluster)+stat;
end
% increase the threshold counter. For all methods except tfce, the
% while loop is quit after the first iteration
thr_counter=thr_counter+1;
end
if has_inf
cval(infinity_msk)=Inf;
end
function [delta_thr,cluster_func,nthr,has_inf]=get_clustering_params(...
method,...
sample,sizes,...
opt,defaults)
switch method
case 'tfce'
delta_thr=get_tfce_threshold_height(opt);
cluster_func=get_tfce_cluster_func(sizes,delta_thr,...
opt,defaults);
nthr=Inf;
has_inf=true;
case 'max'
cluster_func=@(idxs,thr) max(sample(idxs));
delta_thr=get_fixed_threshold_height(opt);
nthr=1;
has_inf=true;
case 'maxsize'
cluster_func=@(idxs,thr) sum(sizes(idxs));
delta_thr=get_fixed_threshold_height(opt);
nthr=1;
has_inf=false;
case 'maxsum'
cluster_func=@(idxs,thr) sum(sample(idxs));
delta_thr=get_fixed_threshold_height(opt);
nthr=1;
has_inf=true;
otherwise
error('illegal method ''%s''', method);
end
function feature_sizes=get_feature_sizes(sample,opt)
nfeatures=size(sample,2);
if isfield(opt,'feature_sizes')
% if given as option, use those
feature_sizes=opt.feature_sizes;
% ensure it is a row vector
if ~isequal(size(feature_sizes),[1,nfeatures])
error('feature sizes must be of size 1x%d', nfeatures);
end
else
feature_sizes=ones(1,nfeatures);
end
function func=get_tfce_cluster_func(sizes,threshold_height,opt,defaults)
% helper to get the cluster function for TFCE, using either supplied or
% default values for the E and H paramters
persistent cached_opt;
persistent cached_defaults;
persistent cached_opt_all;
if isequal(opt,cached_opt) && isequal(defaults, cached_defaults)
opt_all=cached_opt_all;
else
opt_all=cosmo_structjoin(defaults,opt);
cached_opt_all=opt_all;
cached_opt=opt;
cached_defaults=defaults;
end
tfce_E=opt_all.E;
tfce_H=opt_all.H;
check_positive_scalar(tfce_E,'E');
check_positive_scalar(tfce_H,'H');
func=@(idxs,thr) sum(sizes(idxs))^tfce_E ...
* thr^tfce_H * threshold_height;
function dh=get_fixed_threshold_height(opt)
% for everything except TFCE
if ~isfield(opt,'threshold')
error('missing option ''threshold''');
end
if any(cosmo_isfield(opt,{'E','B','dh'}))
error(['options ''dh'', ''E'' or ''B'' are not allowed '...
'for this method']);
end
dh=opt.threshold;
if ~isscalar(dh) || ~isfinite(dh)
error('option ''threshold'' must be a finite scalar');
end
function dh=get_tfce_threshold_height(opt)
if isfield(opt,'threshold')
error('option ''threshold'' is not allowed for this method');
end
if ~isfield(opt,'dh')
error('missing option ''dh'' for ''tfce'' method');
end
dh=opt.dh;
check_positive_scalar(dh,'dh');
function check_positive_scalar(value, name)
if ~isscalar(value) || ~isfinite(value) || value<=0
error('option ''%s'' must be a positive scalar', name);
end
function check_input_sizes(sample,nbrhood_mx)
if size(sample,1)~=1
error('sample input must be a row vector');
end
if ~isnumeric(sample) && ~islogical(sample)
error('sample input must be numeric or logical');
end
if numel(size(nbrhood_mx))~=2
error(['neighborhood matrix must be a matrix '...
'(use cosmo_convert_neighborhood to convert a '...
'neighborhood struct to matrix representation']);
end
if ~isnumeric(nbrhood_mx)
error('neighborhood matrix must be numeric');
end
% ensure they have matching size
if numel(sample)~=size(nbrhood_mx,2)
error(['size mismatch between data (%d elements) '...
'and neighborhood (%d elements)'],...
numel(sample),size(nbrhood_mx,2));
end