cosmo dissimilarity matrix measure skl¶

function ds_dsm = cosmo_dissimilarity_matrix_measure(ds, varargin)
    % Compute a dissimilarity matrix measure
    %
    % ds_dsm = cosmo_dissimilarity_matrix_measure(ds[, varargin])
    %
    % Inputs:
    %  dataset            dataset struct with fields .samples (PxQ) and
    %                     .sa.targets (Px1) for P samples and Q features.
    %                     each target should occur exactly once
    %  args               optional struct:
    %      .metric        a string with the name of the distance
    %                     metric to be used by pdist (default: 'correlation')
    %      .center_data   If true, then data is centered before the pair-wise
    %                     distances are computed. The default is false; when
    %                     used with the 'correlation' metric, it is recommended
    %                     to use center_data, 'true'.
    %
    %   Returns
    %
    % Output:
    %    ds_sa            Struct with fields:
    %      .samples       Nx1 flattened lower triangle of a dissimilarity
    %                     matrix as returned by [cosmo_]pdist, where
    %                     N=P*(P-1)/2 is the number of pairwise distances
    %                     between all samples in the dataset.
    %      .a.sdim.labels Set to
    %      .sa            Struct with field:
    %        .targets1    } Nx1 vectors indicating the pairs of indices in the
    %        .targets2    } lower part of the square form of the dissimilarity
    %                       matrix. if .dsm_pairs(k,:)==[i,j] then .samples(k)
    %                     the dissimlarity between the i-th and j-th sample
    %                     target.
    %
    %
    % Example:
    %     % ds is a dataset struct with ds.sa.targets=(11:16)';
    %     ds=struct();
    %     ds.samples=[1 2 3; 1 2 3; 1 0 1; 1 1 2; 1 1 2];
    %     ds.sa.targets=(11:15)';
    %     %
    %     % compute dissimilarity with centered data
    %     dsm_ds=cosmo_dissimilarity_matrix_measure(ds,'center_data',true);
    %     cosmo_disp(dsm_ds);
    %     %|| .sa
    %     %||   .targets1
    %     %||     [ 2
    %     %||       3
    %     %||       4
    %     %||       :
    %     %||       4
    %     %||       5
    %     %||       5 ]@10x1
    %     %||   .targets2
    %     %||     [ 1
    %     %||       1
    %     %||       1
    %     %||       :
    %     %||       3
    %     %||       3
    %     %||       4 ]@10x1
    %     %|| .a
    %     %||   .sdim
    %     %||     .labels
    %     %||       { 'targets1'  'targets2' }
    %     %||     .values
    %     %||       { [ 11    [ 11
    %     %||           12      12
    %     %||           13      13
    %     %||           14      14
    %     %||           15 ]    15 ] }
    %     %|| .samples
    %     %||   [         0
    %     %||             2
    %     %||             2
    %     %||         :
    %     %||      1.11e-16
    %     %||      1.11e-16
    %     %||     -2.22e-16 ]@10x1
    %     %
    %     % map results to matrix. values of 0 mean perfect correlation
    %     [samples, labels, values]=cosmo_unflatten(dsm_ds,1,...
    %                                           'set_missing_to',NaN);
    %     cosmo_disp(samples)
    %     %|| [ NaN       NaN       NaN       NaN       NaN
    %     %||     0       NaN       NaN       NaN       NaN
    %     %||     2         2       NaN       NaN       NaN
    %     %||     2         2  1.11e-16       NaN       NaN
    %     %||     2         2  1.11e-16 -2.22e-16       NaN ]
    %     %
    %     cosmo_disp(labels)
    %     %|| { 'targets1'  'targets2' }
    %     %
    %     cosmo_disp(values)
    %     %|| { [ 11    [ 11
    %     %||     12      12
    %     %||     13      13
    %     %||     14      14
    %     %||     15 ]    15 ] }
    %
    %     % Searchlight using this measure
    %     ds=cosmo_synthetic_dataset('ntargets',6,'nchunks',1);
    %     % (in this toy example there are only 6 voxels, and the radius
    %     %  of the searchlight is 1 voxel. Real-life examples use larger
    %     %  datasets and a larger radius)
    %     nbrhood=cosmo_spherical_neighborhood(ds,'radius',1,'progress',false);
    %     opt=struct();
    %     opt.progress=false;          % do not show progress
    %     opt.metric='euclidean'; % (instead of default 'correlation')
    %     measure=@cosmo_dissimilarity_matrix_measure;
    %     sl_ds=cosmo_searchlight(ds, nbrhood, measure, opt);
    %     cosmo_disp(sl_ds);
    %     %|| .a
    %     %||   .fdim
    %     %||     .labels
    %     %||       { 'i'  'j'  'k' }
    %     %||     .values
    %     %||       { [ 1         2         3 ]  [ 1         2 ]  [ 1 ] }
    %     %||   .vol
    %     %||     .mat
    %     %||       [ 2         0         0        -3
    %     %||         0         2         0        -3
    %     %||         0         0         2        -3
    %     %||         0         0         0         1 ]
    %     %||     .dim
    %     %||       [ 3         2         1 ]
    %     %||     .xform
    %     %||       'scanner_anat'
    %     %||   .sdim
    %     %||     .labels
    %     %||       { 'targets1'  'targets2' }
    %     %||     .values
    %     %||       { [ 1    [ 1
    %     %||           2      2
    %     %||           3      3
    %     %||           4      4
    %     %||           5      5
    %     %||           6 ]    6 ] }
    %     %|| .fa
    %     %||   .nvoxels
    %     %||     [ 3         4         3         3         4         3 ]
    %     %||   .radius
    %     %||     [ 1         1         1         1         1         1 ]
    %     %||   .center_ids
    %     %||     [ 1         2         3         4         5         6 ]
    %     %||   .i
    %     %||     [ 1         2         3         1         2         3 ]
    %     %||   .j
    %     %||     [ 1         1         1         2         2         2 ]
    %     %||   .k
    %     %||     [ 1         1         1         1         1         1 ]
    %     %|| .samples
    %     %||   [  3.1      3.68      3.56      1.47      2.96      2.27
    %     %||     6.06      6.39      3.29      6.54      4.43       4.1
    %     %||     5.85       4.2      2.11      6.47      6.18      3.47
    %     %||       :         :        :          :         :         :
    %     %||     4.62      3.18     0.829      5.53      5.53      3.14
    %     %||      3.7      3.08      1.75      4.71      4.95      3.39
    %     %||     1.23      0.83      1.48      1.03      1.75      1.31 ]@15x6
    %     %|| .sa
    %     %||   .targets1
    %     %||     [ 2
    %     %||       3
    %     %||       4
    %     %||       :
    %     %||       5
    %     %||       6
    %     %||       6 ]@15x1
    %     %||   .targets2
    %     %||     [ 1
    %     %||       1
    %     %||       1
    %     %||       :
    %     %||       4
    %     %||       4
    %     %||       5 ]@15x1
    %     %||
    %
    %     % limitation: cannot have repeated targets
    %     ds=cosmo_synthetic_dataset('nchunks',2,'ntargets',3);
    %     cosmo_dissimilarity_matrix_measure(ds);
    %     %|| error('...')
    %
    %     % averaging the samples for each unique target resolves the issue of
    %     % repeated targets
    %     ds=cosmo_synthetic_dataset('nchunks',2,'ntargets',3);
    %     ds_avg=cosmo_fx(ds,@(x)mean(x,1),'targets');
    %     ds_dsm=cosmo_dissimilarity_matrix_measure(ds_avg);
    %     cosmo_disp(ds_dsm);
    %     || .sa
    %     ||  .targets1
    %     ||    [ 2
    %     ||      3
    %     ||      3 ]
    %     ||  .targets2
    %     ||    [ 1
    %     ||      1
    %     ||      2 ]
    %     ||.a
    %     ||  .sdim
    %     ||    .labels
    %     ||      { 'targets1'  'targets2' }
    %     ||    .values
    %     ||      { [ 1    [ 1
    %     ||          2      2
    %     ||          3 ]    3 ] }
    %     ||.samples
    %     ||  [  1.68
    %     ||     1.71
    %     ||    0.711 ]
    %
    % Notes:
    %   - it is recommended to set the 'center_data' to true when using
    %     the default 'correlation' metric, as this removes a main effect
    %     common to all samples; but note that this option is disabled by
    %     default due to historical reasons.
    %  -  [cosmo_]pdist defaults to 'euclidean' distance, but correlation
    %     distance is preferable for neural dissimilarity matrices, hence it
    %     is used as the default here
    %  -  Results from this function, when used with the default 'correlation'
    %     metric, should *not* be Fisher transformed (using atanh) because
    %     the output ranges from 0 to 2 (=one minus Pearson correlation)
    %     and the Fisher transform of a value >1 is complex (non-real). This is
    %     generally a Bad Thing.
    %
    % See also: cosmo_pdist, pdist
    %
    % #   For CoSMoMVPA's copyright information and license terms,   #
    % #   see the COPYING file distributed with CoSMoMVPA.           #

    % check input
    check_input(ds);
    args = get_args(varargin);

    % make new dataset
    ds_dsm = struct();

    % if center_data, then subtract the mean first
    samples = ds.samples;
    if args.center_data
        samples = bsxfun(@minus, samples, mean(samples, 1));
    end

    % compute pair-wise distances between all samples using cosmo_pdist,
    % then store them as samples in ds_dsm
    %%%% >>> Your code here <<< %%%%

function check_input(ds)
    if ~(isstruct(ds) && ...
         isfield(ds, 'samples') && ...
         isfield(ds, 'sa') && ...
         isfield(ds.sa, 'targets'))
        error(['require dataset structure with fields '...
               '.samples and .sa.targets']);
    end

function args = get_args(varargin)
    persistent cached_varargin
    persistent cached_args

    if ~isequal(varargin, cached_varargin)
        cached_args = cosmo_structjoin('metric', 'correlation', ...
                                       'center_data', false, ...
                                       varargin);
        cached_varargin = varargin;
    end

    args = cached_args;

function ds_skeleton = get_sample_attributes(targets)
    persistent cached_targets
    persistent cached_ds_skeleton

    if ~isequal(targets, cached_targets)
        ntargets = numel(targets);

        % unique targets
        classes = unique(targets);
        nclasses = numel(classes);

        % each should occur exactly once
        if nclasses ~= ntargets
            error(['.sa.targets should be permutation of unique targets; '...
                   'to average samples with the same targets, consider '...
                   'ds_mean=cosmo_fx(ds,@(x)mean(x,1),''targets'')'], ...
                  nclasses);
        end

        % store single sample attribute: the pairs of sample attribute indices
        % used to compute the dsm.
        [i, j] = find(triu(repmat(1:nclasses, nclasses, 1), 1)');
        cached_ds_skeleton.sa = struct();
        cached_ds_skeleton.sa.targets1 = i;
        cached_ds_skeleton.sa.targets2 = j;

        % set sample dimensions
        add_labels = {'targets1', 'targets2'};
        add_values = {targets, targets};

        cached_ds_skeleton.a.sdim = struct();
        cached_ds_skeleton.a.sdim.labels = add_labels;
        cached_ds_skeleton.a.sdim.values = add_values;

        cached_targets = targets;
    end

    ds_skeleton = cached_ds_skeleton;
cosmo dissimilarity matrix measure skl¶

Previous topic

Next topic

This Page