function samples = cosmo_sample_unique(k, n, count, varargin)
% sample without replacement from subsets of integers in balanced manner
%
% function samples=cosmo_sample_unique(k,n[,count,varargin])
%
% Inputs:
% k number of elements to return in each subset
% n size of integer range from which to sample
% count number of subsets to select (default: 1)
% 'seed', s Use seed s for pseudo-random sampling (optional). If this
% option is omitted, then different calls to this function
% may (usually: will) return different results
%
%
% Output:
% samples k x count indices, all in the range 1:n, with the following
% properties:
% - each value is randomly sampled from the range 1:n
% - each column forms a subset of 1:n (without repeats)
% - across the entire matrix, each value in the range 1:n
% occurs approximately equally often
%
% Example:
% % get 4 random subsets of 3 elements in range 1:7
% % (in this example a seed is used to get the same result upon every
% % function call)
% cosmo_sample_unique(3,6,4,'seed',3)
% %|| 1 2 1 2
% %|| 3 5 3 4
% %|| 4 6 5 6
%
% Notes:
% - this is a utility function; it does not work on dataset structures.
%
% # For CoSMoMVPA's copyright information and license terms, #
% # see the COPYING file distributed with CoSMoMVPA. #
if nargin < 3 || isempty(count)
count = 1;
end
ensure_scalar_nat({k, n, count});
if k > n
error(['First argument (%d) cannot be greater than '...
'second argument (%d)'], k, n);
end
opt = cosmo_structjoin(varargin{:});
% random elements, each column is a permutation of 1:count
rs_mat = random_permutations(n, count + 1, opt);
% vectorize
rs = rs_mat(:);
% allocate space for output
samples = zeros(k, count);
% keep track of which elements in rs were visited
visited = false((k + 1) * count, 1);
first_non_visited_pos = 1;
in_bin = false(n, k); % reuse this for each column
for col = 1:count
% no elements added so far
in_bin(:) = false;
pos = first_non_visited_pos;
for row = 1:k
% avoid duplicates in in_bin
while visited(pos) || in_bin(rs(pos))
pos = pos + 1;
end
r = rs(pos);
in_bin(r) = true;
samples(row, col) = r;
visited(pos) = true;
end
% update first_non_visited pos
while visited(first_non_visited_pos)
first_non_visited_pos = first_non_visited_pos + 1;
end
end
samples = sort(samples, 1);
function r = random_permutations(n, count, opt)
% output r has in each column the values 1:count in randomly permuted
% order
if isfield(opt, 'seed') && ~isempty(opt.seed)
args = {'seed', opt.seed};
else
args = {};
end
v = cosmo_rand(n, count, args{:});
[unused, r] = sort(v, 1);
function ensure_scalar_nat(vs)
for k = 1:numel(vs)
v = vs{k};
if ~(isnumeric(v) && isscalar(v) && v > 0 && round(v) == v)
error('Argument %d must be positive scalar integer', k);
end
end