function [winners,classes]=cosmo_winner_indices(pred)
% Given multiple predictions, get indices that were predicted most often.
%
% [winners,classes]=cosmo_winner_indices(pred)
%
% Input:
% pred PxQ prediction values for Q features and P
% predictions per feature. Values of NaN are ignored,
% i.e. can never be a winner.
%
% Output:
% winners Px1 indices of classes that occur most often.
% winners(k)==w means that no value in
% classes(pred(k,:)) occurs more often than classes(w).
% classes The sorted list of unique predicted values, across
% all non-ignored (non-NaN) values in pred.
%
% Examples:
% % a single prediction, with the third one missing
% pred=[4; 4; NaN; 5];
% [p, c]=cosmo_winner_indices(pred);
% p'
% %|| [1 1 NaN 2]
% c'
% %|| [4, 5]
%
% % one prediction per fold (e.g. using cosmo_nfold_partitioner)
% pred=[4 NaN NaN; 6 NaN NaN; NaN 3 NaN; NaN NaN NaN; NaN NaN 3];
% [p, c]=cosmo_winner_indices(pred);
% p'
% %|| [2, 3, 1, NaN, 1]
% c'
% %|| [3 4 6]
%
% % given up to three predictions each for eight samples, compute
% % which predictions occur most often. NaNs are ignored.
% pred=[4 4 4;4 5 6;6 5 4;5 6 4;4 5 6; NaN NaN NaN; 6 0 0;0 0 NaN];
% [p, c]=cosmo_winner_indices(pred);
% p'
% %|| [2, 3, 4, 2, 3, NaN, 1, 1]
% c'
% %|| [0, 4, 5, 6]
%
% Notes:
% - The typical use case is combining results from multiple classification
% predictions, such as in binary support vector machines (SVMs) and
% cosmo_crossvalidate
% - The current implementation selects a winner pseudo-randomly (but
% deterministically) and (presumably) unbiased in case of a tie between
% multiple winners. That is, using the present implementation, repeatedly
% calling this function with identical input yields identical output,
% but unbiased with respect to which class is the 'winner' sample-wise.
% - Samples with no predictions are assigned a value of NaN.
%
% See also: cosmo_classify_matlabsvm, cosmo_crossvalidate
%
% # For CoSMoMVPA's copyright information and license terms, #
% # see the COPYING file distributed with CoSMoMVPA. #
[nsamples,nfeatures]=size(pred);
pred_msk=~isnan(pred);
% allocate space for output
winners=NaN(nsamples,1);
if nfeatures==1
% single prediction, handle seperately
[classes,unused,pred_idxs]=unique(pred(pred_msk));
winners(pred_msk)=pred_idxs;
return
end
sample_pred_count=sum(pred_msk,2);
sample_pred_msk=sample_pred_count>0;
if max(sample_pred_count)<=1
% only one prediction per sample; set non-predictions to zero and
% add them up to get the prediction
pred(~pred_msk)=0;
pred_merged=sum(pred(sample_pred_msk,:),2);
[classes,unused,pred_idxs]=unique(pred_merged);
winners(sample_pred_msk)=pred_idxs;
return
end
classes=unique(pred(pred_msk));
% see how often each index was predicted
counts=histc(pred,classes,2);
[max_count,idx]=max(counts,[],2);
nwinners=sum(bsxfun(@eq,max_count,counts),2);
% deal with single winners
single_winner_msk=nwinners==1;
winners(single_winner_msk)=idx(single_winner_msk);
% remove the single winners from samples to consider
sample_pred_msk(single_winner_msk)=false;
seed=0;
for k=find(sample_pred_msk)'
tied_idxs=find(counts(k,:)==max_count(k));
ntied=numel(tied_idxs);
seed=seed+1;
winners(k)=tied_idxs(mod(seed,ntied)+1);
end