run bad double dipping analysis skl

%% Double dipping

% Warning: this exercise shows the *bad* practice of double dipping
% (also known as circular analysis). You must never, ever use
% results double dipping to interpret results for a real analysis that you
% would publish.

nfeatures=100;
nsamples_per_class=200;
nclasses=2;
niter=1000;

% compute number of samples
nsamples=nclasses*nsamples_per_class;

% set targets
targets=repmat((1:nclasses)',nsamples_per_class,1);

% allocate space for output
accuracies=zeros(niter,2);

for iter=1:niter
    % generate random gaussian train data of size nsamples x nfeatures
    % assign the result to a variable 'train_data'
    %%%% >>> Your code here <<< %%%%

    % for the double dipping test data, assign 'double_dipping_test_data'
    % to be the same as the training data.
    %
    % *** WARNING ***
    % For real data analyses (that you would publish in a paper) you
    % must never do double dipping analysis - its results are invalid
    % ****************
    %%%% >>> Your code here <<< %%%%

    % for the independent data, generate random gaussian data (of the
    % same size as train_data) and assign to a variable
    % 'independent_test_data'
    %%%% >>> Your code here <<< %%%%

    % compute class labels predictions for both test sets using
    % cosmo_classify_lda. Store the predictions in
    % 'double_dipping_pred' and 'independent_pred', respectively
    %%%% >>> Your code here <<< %%%%

    % compute classification accuracies
    double_dipping_acc=mean(double_dipping_pred==targets);
    independent_acc=mean(independent_pred==targets);

    % store accuracies in the iter-th row of the 'accuracies' matrix
    %%%% >>> Your code here <<< %%%%
end

% show histogram
hist(accuracies,100)
legend({'double dipping','independent'})