cosmo correlation measure hdrΒΆ

function ds_sa=cosmo_correlation_measure(ds, varargin)
% Computes a split-half correlation measure
%
% d=cosmo_correlation_measure(ds[, args])
%
% Inputs:
%  ds             dataset structure with fields .samples, .sa.targets and
%                 .sa.chunks
%  args           optional struct with the following optional fields:
%    .partitions  struct with fields .train_indices and .test_indices.
%                 Both should be a Px1 cell for P partitions. If omitted,
%                 it is set to cosmo_nchoosek_partitioner(ds,'half').
%    .template    QxQ matrix for Q classes in each chunk. This matrix
%                 weights the correlations across the two halves.
%                 If ds.sa.targets has only one unique value, it must be
%                 set to the scalar value 1; otherwise it should
%                 have a mean of zero. If omitted, it has positive values
%                 of (1/Q) on the diagonal and (-1/(Q*(Q-1)) off the
%                 diagonal.
%                 (Note: this can be used to test for representational
%                 similarity matching)
%    .merge_func  A function handle used to merge data from matching
%                 targets in the same chunk. Default is @(x) mean(x,1),
%                 meaning that values are averaged over the same samples.
%                 It is assumed that isequal(args.merge_func(y),y) if y
%                 is a row vector.
%    .corr_type   Type of correlation: 'Pearson','Spearman','Kendall'.
%                 The default is 'Pearson'.
%    .post_corr_func  Operation performed after correlation. (default:
%                     @atanh)
%    .output      'mean' (default): correlations weighted by template
%                 'raw' or 'correlation': correlations between all classes
%                 'one_minus_correlation': 1 minus correlations
%                 'mean_by_fold': provide weighted correlations for each
%                                 fold in the partitions.
%
%
% Output:
%    ds_sa        Struct with fields:
%      .samples   Scalar indicating how well the template matrix
%                 correlates with the correlation matrix from the two
%                 halves (averaged over partitions). By default:
%                 - this value is based on Fisher-transformed correlation
%                   values, not raw correlation values
%                 - this is the average of the (Fisher-transformed)
%                   on-diagonal minus the average of the
%                   (Fisher-transformed) off-diagonal elements of the
%                   correlation matrix based on the two halves of the data.
%      .sa        Struct with field:
%        .labels  if output=='corr'
%        .half1   } if output=='raw': (N^2)x1 vectors with indices of data
%        .half2   } from two halves, with N the number of unique targets.
%
% Example:
%     ds=cosmo_synthetic_dataset();
%     %
%     % compute on-minus-off diagonal correlations
%     c=cosmo_correlation_measure(ds);
%     cosmo_disp(c)
%     %|| .samples
%     %||   [ 1.23 ]
%     %|| .sa
%     %||   .labels
%     %||   { 'corr' }
%     %
%     % Spearman correlation requires the Matlab statistics toolbox
%     cosmo_skip_test_if_no_external('@stats');
%     %
%     c=cosmo_correlation_measure(ds,'corr_type','Spearman');
%     cosmo_disp(c)
%     %|| .samples
%     %||   [ 1.28 ]
%     %|| .sa
%     %||   .labels
%     %||   { 'corr' }
%
%     ds=cosmo_synthetic_dataset();
%     % get raw correlations without fisher transformation
%     c_raw=cosmo_correlation_measure(ds,'output','correlation',...
%                                       'post_corr_func',[]);
%     cosmo_disp(c_raw)
%     %|| .samples
%     %||   [  0.363
%     %||     -0.404
%     %||     -0.447
%     %||      0.606 ]
%     %|| .sa
%     %||   .half1
%     %||     [ 1
%     %||       2
%     %||       1
%     %||       2 ]
%     %||   .half2
%     %||     [ 1
%     %||       1
%     %||       2
%     %||       2 ]
%     %|| .a
%     %||   .sdim
%     %||     .labels
%     %||       { 'half1'  'half2' }
%     %||     .values
%     %||       { [ 1    [ 1
%     %||           2 ]    2 ] }
%     %
%     % convert to matrix form (N x N x P, with N the number of classes and
%     % P=1)
%     matrices=cosmo_unflatten(c_raw,1);
%     cosmo_disp(matrices)
%     %|| [  0.363    -0.447
%     %||   -0.404     0.606 ]
%
%     % compute for each fold separately, using a custom take-one-chunk
%     % out partitioning scheme and without Fisher transformation of
%     % the correlations.
%     % Note that c.sa.chunks in the output reflects the test chunk
%     % in each partition
%     ds=cosmo_synthetic_dataset('type','fmri','nchunks',4);
%     partitions=cosmo_nfold_partitioner(ds);
%     c=cosmo_correlation_measure(ds,'output','mean_by_fold',...
%                                    'partitions',partitions,...
%                                    'post_corr_func',[]);
%     cosmo_disp(c.samples);
%     %|| [  1.32
%     %||   0.512
%     %||    1.05
%     %||    1.23 ]
%     cosmo_disp(c.sa);
%     %|| .partition
%     %||   [ 1
%     %||     2
%     %||     3
%     %||     4 ]
%
%     % minimal searchlight example
%     ds=cosmo_synthetic_dataset('type','fmri');
%     % use searchlight with radius 1 voxel (radius=3 is more typical)
%     nbrhood=cosmo_spherical_neighborhood(ds,'radius',1,'progress',false);
%     % run searchlight
%     res=cosmo_searchlight(ds,nbrhood,@cosmo_correlation_measure,...
%                               'progress',false);
%     cosmo_disp(res.samples)
%     %|| [ 1.87      1.25      1.51      1.68      1.71     0.879 ]
%     cosmo_disp(res.sa)
%     %|| .labels
%     %||   { 'corr' }
%
% Notes:
%   - by default the post_corr_func is set to @atanh. This is equivalent to
%     a Fisher transformation from r (correlation) to z (standard z-score).
%     The underlying math is z=atanh(r)=.5*log((1+r)./log(1-r)).
%     The rationale is to make data more normally distributed under the
%     null hypothesis.
%     Fisher-transformed correlations can be transformed back to
%     their original correlation values using 'tanh', which is the inverse
%     of 'atanh'.
%   - To disable the (by default used) Fisher-transformation, set the
%     'post_corr_func' option to [].
%   - if multiple samples are present with the same chunk and target, they
%     are averaged *prior* to computing the correlations.
%   - if multiple partitions are present, then the correlations are
%     computed separately for each partition, and then averaged (unless
%     the 'output' option is set, and set to a different value than
%     'mean'.
%   - When more than two chunks are present in the input, partitions
%     consist of all possible half splits for which the number of unique
%     chunks in the train and test set differ by 1 at most.
%     For illustration, up to 6 chunks, the
%     partitions are:
%       - 2 chunks   -  partition #1
%         chunks first  half: {    1
%         chunks second half:  {   2
%
%       - 3 chunks   -  partition #1  #2  #3
%         chunks first  half: {    3   2   1
%                              {   1   1   2
%         chunks second half:  {   2   3   3
%
%       - 4 chunks   -  partition #1  #2  #3
%                             {    3   2   2
%         chunks first  half: {    4   4   3
%                              {   1   1   1
%         chunks second half:  {   2   3   4
%
%       - 5 chunks   -  partition #1  #2  #3  #4  #5  #6  #7  #8  #9  #10
%                             {    4   3   3   2   2   2   1   1   1   1
%         chunks first  half: {    5   5   4   5   4   3   5   4   3   2
%                              {   1   1   1   1   1   1   2   2   2   3
%         chunks second half:  {   2   2   2   3   3   4   3   3   4   4
%                              {   3   4   5   4   5   5   4   5   5   5
%
%       - 6 chunks   -  partition #1  #2  #3  #4  #5  #6  #7  #8  #9  #10
%                             {    4   3   3   3   2   2   2   2   2   2
%         chunks first  half: {    5   5   4   4   5   4   4   3   3   3
%                             {    6   6   6   5   6   6   5   6   5   4
%                              {   1   1   1   1   1   1   1   1   1   1
%         chunks second half:  {   2   2   2   2   3   3   3   4   4   5
%                              {   3   4   5   6   4   5   6   5   6   6
%     Thus, with an increasing number of chunks, the number of partitions
%     (and thus the time required to run this function) increases
%     quadratically. To use simpler partition schemes (e.g. odd-even, as
%     provided by cosmo_oddeven_partitioner), specify the 'partitions'
%     argument.
%
% References
%   - Haxby, J. V. et al (2001). Distributed and overlapping
%     representations of faces and objects in ventral temporal cortex.
%     Science 293, 2425?2430
%
% #   For CoSMoMVPA's copyright information and license terms,   #
% #   see the COPYING file distributed with CoSMoMVPA.           #