cosmo sample unique¶

function samples = cosmo_sample_unique(k, n, count, varargin)
    % sample without replacement from subsets of integers in balanced manner
    %
    % function samples=cosmo_sample_unique(k,n[,count,varargin])
    %
    % Inputs:
    %   k           number of elements to return in each subset
    %   n           size of integer range from which to sample
    %   count       number of subsets to select (default: 1)
    %   'seed', s   Use seed s for pseudo-random sampling (optional). If this
    %               option is omitted, then different calls to this function
    %               may (usually: will) return different results
    %
    %
    % Output:
    %   samples     k x count indices, all in the range 1:n, with the following
    %               properties:
    %               - each value is randomly sampled from the range 1:n
    %               - each column forms a subset of 1:n (without repeats)
    %               - across the entire matrix, each value in the range 1:n
    %                 occurs approximately equally often
    %
    % Example:
    %     % get 4 random subsets of 3 elements in range 1:7
    %     % (in this example a seed is used to get the same result upon every
    %     % function call)
    %     cosmo_sample_unique(3,6,4,'seed',3)
    %     %||      1     2     1     2
    %     %||      3     5     3     4
    %     %||      4     6     5     6
    %
    % Notes:
    %   - this is a utility function; it does not work on dataset structures.
    %
    % #   For CoSMoMVPA's copyright information and license terms,   #
    % #   see the COPYING file distributed with CoSMoMVPA.           #

    if nargin < 3 || isempty(count)
        count = 1;
    end

    ensure_scalar_nat({k, n, count});
    if k > n
        error(['First argument (%d) cannot be greater than '...
               'second argument (%d)'], k, n);
    end

    opt = cosmo_structjoin(varargin{:});

    % random elements, each column is a permutation of 1:count
    rs_mat = random_permutations(n, count + 1, opt);

    % vectorize
    rs = rs_mat(:);

    % allocate space for output
    samples = zeros(k, count);

    % keep track of which elements in rs were visited
    visited = false((k + 1) * count, 1);

    first_non_visited_pos = 1;
    in_bin = false(n, k); % reuse this for each column
    for col = 1:count
        % no elements added so far
        in_bin(:) = false;

        pos = first_non_visited_pos;
        for row = 1:k
            % avoid duplicates in in_bin
            while visited(pos) || in_bin(rs(pos))
                pos = pos + 1;
            end

            r = rs(pos);
            in_bin(r) = true;
            samples(row, col) = r;
            visited(pos) = true;
        end

        % update first_non_visited pos
        while visited(first_non_visited_pos)
            first_non_visited_pos = first_non_visited_pos + 1;
        end
    end

    samples = sort(samples, 1);

function r = random_permutations(n, count, opt)
    % output r has in each column the values 1:count in randomly permuted
    % order
    if isfield(opt, 'seed') && ~isempty(opt.seed)
        args = {'seed', opt.seed};
    else
        args = {};
    end

    v = cosmo_rand(n, count, args{:});
    [unused, r] = sort(v, 1);

function ensure_scalar_nat(vs)
    for k = 1:numel(vs)
        v = vs{k};

        if ~(isnumeric(v) && isscalar(v) && v > 0 && round(v) == v)
            error('Argument %d must be positive scalar integer', k);
        end
    end
cosmo sample unique¶

Previous topic

Next topic

This Page