function d = cosmo_pdist(x, distance)
% compute pair-wise distance between samples in a matrix
%
% d=cosmo_pdist(x[, distance])
%
% Inputs:
% x PxM matrix for P samples and M features
% distance distance metric: one of 'euclidean' (default),
% 'correlation' (computing one-minus-correlation), or any
% other metric supported by matlab's built-in 'pdist'
%
% Outputs:
% d 1xN row vector with pairwise distances, where N=M*(M-1),
% containing the distances of the lower triangle of the
% distance matrix
%
% Examples:
% data=[1 4 3; 2 2 3; 4 2 0;0 1 1];
% % compute pair-wise distances with euclidean distance metric.
% % since there are 4 samples, there are 4*3/2=6 pairs of samples.
% d=cosmo_pdist(data);
% cosmo_disp(d);
% %|| [ 2.24 4.69 3.74 3.61 3 4.24 ]
% %
% % this gives the same output
% d=cosmo_pdist(data,'euclidean');
% cosmo_disp(d);
% %|| [ 2.24 4.69 3.74 3.61 3 4.24 ]
% %
% % compute distances with one-minus-correlation distance metric
% d_c=cosmo_pdist(data,'correlation');
% cosmo_disp(d_c);
% %|| [ 0.811 1.65 0.0551 1.87 0.5 1.87 ]
%
% Notes:
% - this function provides a native implementation for 'euclidean' and
% 'correlation'; other distance metrics require the pdist function
% supplied in the matlab stats toolbox, or the octave statistics
% package
% - the rationale for providing this function is to support pair-wise
% distances on platforms without the stats toolbox
% - to compute pair-wise distances on a dataset struct, use
% cosmo_dissimilarity_matrix_measure
% - the output of this function can be given to [cosmo_]squareform to
% recreate a PxP distance matrix
%
% See also: cosmo_dissimilarity_matrix_measure, cosmo_squareform
%
% # For CoSMoMVPA's copyright information and license terms, #
% # see the COPYING file distributed with CoSMoMVPA. #
if nargin < 2 || isempty(distance)
distance = 'euclidean';
end
ns = size(x, 1);
d = zeros(1, ns * (ns - 1) / 2);
switch distance
case 'euclidean'
pos = 0;
for k = 1:ns
ji = ((k + 1):ns);
nj = numel(ji);
idxs = pos + (1:nj);
delta = bsxfun(@minus, x(k, :), x(ji, :));
d_idxs = sqrt(sum(delta.^2, 2));
d(idxs) = d_idxs;
pos = pos + nj;
end
case 'correlation'
% it's faster to compute all correlations that just the lower
% diagonal
dfull = cosmo_corr(x');
ns_rng = 1:ns;
% make diagonal mask
msk = bsxfun(@gt, ns_rng', ns_rng);
d = 1 - dfull(msk)';
otherwise
is_octave = cosmo_wtf('is_octave');
if is_octave || cosmo_check_external('@stats', false)
d = pdist(x, distance);
else
error(['Matlab requires the stats toolbox for distance '...
'metric %s'], distance);
end
end