function [pred, accuracy] = cosmo_crossvalidate(ds, classifier, partitions, opt)
% performs cross-validation using a classifier
%
% [pred, accuracy] = cosmo_crossvalidate(dataset, classifier, partitions, opt)
%
% Inputs
% ds struct with fields .samples (PxQ for P samples and
% Q features) and .sa.targets (Px1 labels of samples)
% classifier function handle to classifier, e.g.
% @classify_naive_baysian
% partitions For example the output from nfold_partition
% opt optional struct with options for classifier
% .normalization optional, one of 'zscore','demean','scale_unit'
% to normalize the data prior to classification using
% zscoring, demeaning or scaling to [-1,1] along the
% first dimension of ds. Normalization
% parameters are estimated using the training data
% and applied to the testing data.
% .pca_explained_count optional, transform the data with PCA prior to
% classification, and retain this number of
% components
% .pca_explained_ratio optional, transform the data with PCA prior to
% classification, and retain the components that
% explain this percentage of the variance
% (value between 0-1)
% .check_partitions optional (default: true). If set to false then
% partitions are not checked for being set properly.
% .average_train_X average the samples in the train set using
% cosmo_average_samples. For X, use any parameter
% supported by cosmo_average_samples, i.e. either
% 'count' or 'ratio', and optionally, 'resamplings'
% or 'repeats'.
%
% Output
% pred Qx1 array with predicted class labels.
% elements with no predictions have the value NaN.
% accuracy scalar classification accuracy
% test_chunks Qx1 array with chunks of input dataset, if each
% prediction was based using a single classification
% step. Predictions with no or more than one
% classification step are set to NaN
%
% Examples:
% % generate dataset with 3 targets and 4 chunks, first target is 3
% ds=cosmo_synthetic_dataset('ntargets',3,'nchunks',4,'target1',3);
% % use take-1-chunk for testing crossvalidation
% partitions=cosmo_nfold_partitioner(ds);
% classifier=@cosmo_classify_naive_bayes;
% % run crossvalidation
% [pred,accuracy]=cosmo_crossvalidate(ds, classifier, ...
% partitions);
% % show targets, chunks, and predictions labels for each of the
% % four folds
% cosmo_disp({ds.sa.targets,ds.sa.chunks,pred},'threshold',inf)
% %|| { [ 3 [ 1 [ 3 NaN NaN NaN
% %|| 4 1 4 NaN NaN NaN
% %|| 5 1 5 NaN NaN NaN
% %|| 3 2 NaN 3 NaN NaN
% %|| 4 2 NaN 5 NaN NaN
% %|| 5 2 NaN 5 NaN NaN
% %|| 3 3 NaN NaN 3 NaN
% %|| 4 3 NaN NaN 4 NaN
% %|| 5 3 NaN NaN 5 NaN
% %|| 3 4 NaN NaN NaN 3
% %|| 4 4 NaN NaN NaN 4
% %|| 5 ] 4 ] NaN NaN NaN 5 ] }
% cosmo_disp(accuracy)
% %|| 0.917
% %
% % use take-2-chunks out for testing crossvalidation, LDA classifier
% partitions=cosmo_nchoosek_partitioner(ds,2);
% classifier=@cosmo_classify_lda;
% % run crossvalidation
% [pred,accuracy]=cosmo_crossvalidate(ds, classifier, ...
% partitions);
% % show targets, chunks, and predictions labels for each of the
% % four folds
% cosmo_disp({ds.sa.targets,ds.sa.chunks,pred},'threshold',inf)
% %|| { [ 3 [ 1 [ 5 5 3 NaN NaN NaN
% %|| 4 1 4 4 4 NaN NaN NaN
% %|| 5 1 5 5 4 NaN NaN NaN
% %|| 3 2 3 NaN NaN 3 3 NaN
% %|| 4 2 4 NaN NaN 4 4 NaN
% %|| 5 2 5 NaN NaN 4 5 NaN
% %|| 3 3 NaN 5 NaN 3 NaN 3
% %|| 4 3 NaN 4 NaN 4 NaN 4
% %|| 5 3 NaN 5 NaN 5 NaN 5
% %|| 3 4 NaN NaN 3 NaN 3 3
% %|| 4 4 NaN NaN 4 NaN 4 5
% %|| 5 ] 4 ] NaN NaN 5 NaN 3 3 ] }
% cosmo_disp(accuracy)
% %|| 0.778
% %
% % as the example above, but (1) use z-scoring on each training set
% % and apply the estimated mean and std to the test set, and (2)
% % use odd-even partitioner
% opt=struct();
% opt.normalization='zscore';
% partitions=cosmo_oddeven_partitioner(ds);
% % run crossvalidation
% [pred,accuracy]=cosmo_crossvalidate(ds, classifier, ...
% partitions, opt);
% % show targets, predicted labels, and accuracy
% cosmo_disp({ds.sa.targets,ds.sa.chunks,pred},'threshold',inf)
% %|| { [ 3 [ 1 [ NaN 5
% %|| 4 1 NaN 4
% %|| 5 1 NaN 5
% %|| 3 2 3 NaN
% %|| 4 2 4 NaN
% %|| 5 2 5 NaN
% %|| 3 3 NaN 5
% %|| 4 3 NaN 4
% %|| 5 3 NaN 5
% %|| 3 4 3 NaN
% %|| 4 4 4 NaN
% %|| 5 ] 4 ] 3 NaN ] }
% cosmo_disp(accuracy)
% %|| 0.75
%
% Notes:
% - to apply this to a dataset struct as a measure (for searchlights),
% consider using cosmo_crossvalidation_measure
% - to average samples in the training set prior to training, use the
% options provided by cosmo_average_samples prefixed by
% 'average_train_'. For example, to take averages of 5 samples, and use
% each sample in the input approximately 4 times, use:
% opt.average_train_count=5;
% opt.average_train_resamplings=4;
%
% See also: cosmo_crossvalidation_measure, cosmo_average_samples
%
% # For CoSMoMVPA's copyright information and license terms, #
% # see the COPYING file distributed with CoSMoMVPA. #