function ranks=cosmo_tiedrank(data, dim)
% Compute ranks for the input along the specified dimension
%
% ranks=cosmo_tiedrank(data[, dim])
%
% Inputs:
% data numeric N-dimensional array
% dim optional dimension along which the ranks
% are computed (default: 1)
%
% Output:
% ranks numeric N-dimensional array with the same
% size as the input containing the rank of
% each vector along the dim-th dimension.
% Equal values have the same rank, which is
% the average of the rank the values would
% have if they differed by a minimal amount.
% NaN values in the input result in a NaN
% values in the output at the corresponding
% locations.
% If dim is greater than the number of
% dimensions in data, then all values in rank
% are one (or NaN of the corresponding value
% in data is NaN).
%
% Examples:
% cosmo_tiedrank([1 2 2],2)
% %|| [ 1 2.5 2.5]
%
% cosmo_tiedrank([NaN 2 2;3 NaN 4],1)
% %|| [ NaN 1 1;
% %|| 1 NaN 2];
%
% cosmo_tiedrank([NaN 2 2;3 NaN 4],2)
% %|| [ NaN 1.5 1.5;
% %|| 1 NaN 2];
%
% cosmo_tiedrank([2 4 3 3 3 3 5 5 5],2)
% %|| [ 1.0 6.0 3.5 3.5 3.5 3.5 8.0 8.0 8.0 ]
%
% Notes:
% - Unlike the Matlab builtin function 'tiedrank' (part of the statistics
% toolbox), the meaning of the second argument is the dimension along
% which the ranks are computed.
%
% # For CoSMoMVPA's copyright information and license terms, #
% # see the COPYING file distributed with CoSMoMVPA. #
if nargin<2
dim=1;
end
check_inputs(data, dim);
orig_size=size(data);
if numel(orig_size)<dim
% if the input data does not have enough data, then the output
% consists of an array with only ones (or NaNs, if present)
ranks=singleton_ranks(data);
return;
end
[values,idx]=sort(data,dim);
data_is_vector=numel(orig_size)<=2 && orig_size(3-dim)==1;
if data_is_vector
ranks=vector_tied_rank(values(:), idx(:));
if orig_size(1)==1
% transpose to turn it back into a row vector
ranks=ranks';
end
return
end
% make the dim-th dimension the first dimension
values_sh=shiftdim(values,dim-1);
idx_sh=shiftdim(idx,dim-1);
sh_size=size(values_sh);
count_along_dim=size(values_sh,1);
% reshape into a matrix
values_mat=reshape(values_sh,count_along_dim,[]);
idx_mat=reshape(idx_sh,count_along_dim,[]);
% space for output
ranks_mat=zeros(size(idx_mat));
% compute for each column vector
n_col=size(ranks_mat,2);
for k=1:n_col
ranks_mat(:,k)=vector_tied_rank(values_mat(:,k),idx_mat(:,k));
end
% put back in shape after shiftdim
ranks_sh=reshape(ranks_mat,sh_size);
% undo shiftdim
unshift_count=numel(orig_size)-dim+1;
ranks=reshape(shiftdim(ranks_sh,unshift_count),orig_size);
function ranks=vector_tied_rank(sorted_values, sort_idx)
% sorted_values and sort_idx are the output from 'sort'
% it is assumes that sorted_values is a vector
n_values=numel(sorted_values);
nan_msk=isnan(sorted_values);
nan_count=sum(nan_msk);
non_nan_count=numel(sorted_values)-nan_count;
% first set ranks for values without ties
ranks=sort_idx+NaN;
ranks(sort_idx(1:non_nan_count))=1:non_nan_count;
% now deal with ties
tie_msk=sorted_values(2:end)==sorted_values(1:(end-1));
tie_idx=find(tie_msk);
tie_count=numel(tie_idx);
k=0;
while k<tie_count
k=k+1;
tie_start=tie_idx(k);
tie_end=tie_start+1;
while tie_end<n_values ...
&& sorted_values(tie_end)==sorted_values(tie_end+1)
tie_end=tie_end+1;
k=k+1;
end
tie_value=(tie_start+tie_end)/2;
pos=tie_start+(0:(tie_end-tie_start));
ranks(sort_idx(pos))=tie_value;
end
function ranks=singleton_ranks(data)
% all ranks are either NaN or 1
ranks=ones(size(data));
ranks(isnan(data))=NaN;
function check_inputs(data, dim)
if ~isnumeric(data)
error('First input must be numeric')
end
if ~(isnumeric(dim) ...
&& isscalar(dim) ...
&& round(dim)==dim ...
&& dim>0)
error('Second argument must be numeric integer');
end