cosmo sample unique skl

function samples=cosmo_sample_unique(k,n,count,varargin)
% sample without replacement from subsets of integers in balanced manner
%
% function samples=cosmo_sample_unique(k,n[,count,varargin])
%
% Inputs:
%   k           number of elements to return in each subset
%   n           size of integer range from which to sample
%   count       number of subsets to select (default: 1)
%   'seed', s   Use seed s for pseudo-random sampling (optional). If this
%               option is omitted, then different calls to this function
%               may (usually: will) return different results
%
%
% Output:
%   samples     k x count indices, all in the range 1:n, with the following
%               properties:
%               - each value is randomly sampled from the range 1:n
%               - each column forms a subset of 1:n (without repeats)
%               - across the entire matrix, each value in the range 1:n
%                 occurs approximately equally often
%
% Example:
%     % get 4 random subsets of 3 elements in range 1:7
%     % (in this example a seed is used to get the same result upon every
%     % function call)
%     cosmo_sample_unique(3,6,4,'seed',3)
%     %||      1     2     1     2
%     %||      3     5     3     4
%     %||      4     6     5     6
%
% Notes:
%   - this is a utility function; it does not work on dataset structures.
%
% #   For CoSMoMVPA's copyright information and license terms,   #
% #   see the COPYING file distributed with CoSMoMVPA.           #

    if nargin<3 || isempty(count)
        count=1;
    end

    ensure_scalar_nat({k,n,count});
    if k>n
        error(['First argument (%d) cannot be greater than '...
                    'second argument (%d)'],k,n);
    end

    opt=cosmo_structjoin(varargin{:});

    % random elements, each column is a permutation of 1:count
    rs_mat=random_permutations(n,count+1,opt);

    % vectorize
    rs=rs_mat(:);

    % allocate space for output
    samples=zeros(k,count);

    % keep track of which elements in rs were visited
    visited=false((k+1)*count,1);

    first_non_visited_pos=1;
    in_bin=false(n,k); % re-use this for each column
    for col=1:count
        % no elements added so far
        in_bin(:)=false;

        pos=first_non_visited_pos;
        for row=1:k
            % avoid duplicates in in_bin
            while visited(pos) || in_bin(rs(pos))
                pos=pos+1;
            end

            r=rs(pos);
            in_bin(r)=true;
            samples(row,col)=r;
            visited(pos)=true;
        end

        % update first_non_visited pos
        while visited(first_non_visited_pos)
            first_non_visited_pos=first_non_visited_pos+1;
        end
    end

    samples=sort(samples,1);

function r=random_permutations(n,count,opt)
    % output r has in each column the values 1:count in randomly permuted
    % order
    if isfield(opt,'seed') && ~isempty(opt.seed)
        args={'seed',opt.seed};
    else
        args={};
    end

    v=cosmo_rand(n,count,args{:});
    [unused,r]=sort(v,1);


function ensure_scalar_nat(vs)
    for k=1:numel(vs)
        v=vs{k};

        if ~(isnumeric(v) && isscalar(v) && v>0 && round(v)==v)
            error('Argument %d must be positive scalar integer',k);
        end
    end