%% Double dipping
% Warning: this exercise shows the *bad* practice of double dipping
% (also known as circular analysis). You must never, ever use
% results double dipping to interpret results for a real analysis that you
% would publish.
nfeatures=100;
nsamples_per_class=200;
nclasses=2;
niter=1000;
% compute number of samples
nsamples=nclasses*nsamples_per_class;
% set targets
targets=repmat((1:nclasses)',nsamples_per_class,1);
% allocate space for output
accuracies=zeros(niter,2);
for iter=1:niter
% generate random gaussian train data of size nsamples x nfeatures
% assign the result to a variable 'train_data'
%%%% >>> Your code here <<< %%%%
% for the double dipping test data, assign 'double_dipping_test_data'
% to be the same as the training data.
%
% *** WARNING ***
% For real data analyses (that you would publish in a paper) you
% must never do double dipping analysis - its results are invalid
% ****************
%%%% >>> Your code here <<< %%%%
% for the independent data, generate random gaussian data (of the
% same size as train_data) and assign to a variable
% 'independent_test_data'
%%%% >>> Your code here <<< %%%%
% compute class labels predictions for both test sets using
% cosmo_classify_lda. Store the predictions in
% 'double_dipping_pred' and 'independent_pred', respectively
%%%% >>> Your code here <<< %%%%
% compute classification accuracies
double_dipping_acc=mean(double_dipping_pred==targets);
independent_acc=mean(independent_pred==targets);
% store accuracies in the iter-th row of the 'accuracies' matrix
%%%% >>> Your code here <<< %%%%
end
% show histogram
hist(accuracies,100)
legend({'double dipping','independent'})