function [pca_samples,params]=cosmo_pca(samples,retain)
% Principal Component Analysis
%
% [pca_samples,params]=cosmo_pca(samples[,retain])
%
% Input:
% samples M x N numeric matrix
% retain (optional) number of components to retain;
% must be less than or equal to N. Default: N
%
% Output:
% pca_samples M x retain samples in Principal Component
% space, after samples have been centered
% params struct with fields:
% .coef M x retain Principal Component coefficients
% .mu M x 1 column-wise average of samples
% It holds that:
% samples=bsxfun(@plus,params.mu,...
% pca_samples*params.coef')
% .explained 1 x N Percentage of explained variance
%
%
% Examples:
% samples=[ 2.0317 -0.8918 -0.8258;...
% 0.5838 1.8439 1.1656;...
% -1.4437 -0.2617 -1.9207;...
% -0.5177 2.3387 0.4412;...
% 1.1908 -0.2040 -0.2088;...
% -1.3265 2.7235 0.1476];
% %
% % apply PCA, keeping two dimensions
% [pca_samples,params]=cosmo_pca(samples,2);
% %
% % show samples in PC space
% cosmo_disp(pca_samples);
% %|| [ -2.64 0.654
% %|| 0.923 1.43
% %|| -0.723 -2.48
% %|| 1.64 0.265
% %|| -1.46 0.569
% %|| 2.27 -0.438 ]
% %
% % show parameters
% cosmo_disp(params);
% %|| .coef
% %|| [ -0.512 0.744
% %|| 0.794 0.219
% %|| 0.328 0.632 ]
% %|| .mu
% %|| [ 0.0864 0.925 -0.2 ]
% %|| .explained
% %|| [ 66
% %|| 33.3
% %|| 0.676 ]
%
% # For CoSMoMVPA's copyright information and license terms, #
% # see the COPYING file distributed with CoSMoMVPA. #
if nargin<2
retain=[];
end
verify_parameters(samples,retain)
ndim=get_number_of_components(samples,retain);
% subtract mean
mu=mean(samples,1);
samples_demu=bsxfun(@minus, samples, mu);
% singular value decomposition
[u,s,w]=svd(samples_demu,'econ');
% extract eigen values
[nrow,ncol]=size(samples);
samples_is_vector=nrow==1 || ncol==1;
if samples_is_vector
% single eigen value
eigvals=s(1);
else
% take diagonal
eigvals=diag(s);
end
if ndim==0
% seperate case for zero dimensions
pca_samples=zeros(1,0);
coef=zeros(ncol,0);
else
pca_samples_rand_sign=bsxfun(@times,u(:,1:ndim),eigvals(1:ndim)');
[coef,sgn]=max_abs_positive_columnwise(w(:,1:ndim));
pca_samples=bsxfun(@times,pca_samples_rand_sign,sgn);
end
% store coefficients
params=struct();
params.coef=coef;
params.mu=mu;
nexpl=min([nrow-1,ncol]);
if nrow==1 || ncol==0
% special case for empty vecto with explained variance
params.explained=zeros(1,0);
else
explained_ratio=eigvals'.^2;
params.explained=100*explained_ratio(1:nexpl)/sum(explained_ratio);
end
function ncomp=get_number_of_components(samples,retain)
max_retain=size(samples,2);
if isempty(retain)
retain=max_retain;
end
if retain>max_retain
error('retain argument %d must be less than %d',...
retain,max_retain);
end
[nrow,ncol]=size(samples);
ncomp=min([nrow-1,ncol,retain]);
function [coef_pos,sgn]=max_abs_positive_columnwise(coef)
% swap sign for each column in which the maximum absolute value is
% negative. sgn contains the sign used in each column (-1 or 1)
[unused,i]=max(abs(coef),[],1);
[nrows,ncols]=size(coef);
mx_idx=(0:(ncols-1))*nrows+i;
mx=coef(mx_idx);
sgn=(mx>0)*2-1;
coef_pos=bsxfun(@times,coef,sgn);
function verify_parameters(samples,retain)
if ~isnumeric(samples)
error('samples argument must be numeric');
end
if numel(size(samples))>2
error('samples argument must be a matrix');
end
if ~(isempty(retain) || ...
(isscalar(retain) && ...
retain>0 && ...
isequal(round(retain),retain)))
error('retain argument must be positive integer');
end