Commit ad834de2 authored by [ofaull]'s avatar [ofaull]
Browse files

Added R and p correlation values to validation Figures

parents
.DS_Store
cluster_useful_lines.txt
fix/
pbihb_unused/
[submodule "PhysIO"]
path = PhysIO
url = git@tnurepository.ethz.ch:ofaull/physio-public.git
[submodule "SPM12"]
path = SPM12
url = https://github.com/spm/spm12.git
[submodule "TAPAS"]
path = TAPAS
url = https://github.com/translationalneuromodeling/tapas.git
[submodule "pbihb_prepModels"]
path = pbihb_prepModels
url = git@tnurepository.ethz.ch:ofaull/pbihb_prepmodels.git
[submodule "HMeta-d"]
path = HMeta-d
url = https://github.com/metacoglab/HMeta-d.git
Subproject commit 83601d3a7765b65e46bb2fed4b9152a3a31963b7
Download source files from
<https://sourceforge.net/projects/mcmc-jags/files/JAGS/3.x/Source/>
```shell
tar -xzvf JAGS-3.4.0.tar.gz
cd JAGS-3.4.0/
# Set up compiler
module load gcc openblas
module list
# Build and install!
mkdir "${HOME}/jags"
LIBS=-lopenblas ./configure --prefix="${HOME}/jags"
make
make install
```
# Open matjags.m (in HMeta-d toolbox) and re-set the location of jags to where you have downloaded it on Euler,
# by changing the 2 instances of the following:
# original line: jagsPrefix = ''; %sprintf('/usr/local/bin/');
# example change to: jagsPrefix = sprintf('/cluster/home/ofaull/jags/bin/');
function [samples, stats, structArray] = matjags(dataStruct, jagsFilenm, initStructs , varargin)
% MATJAGS, a Matlab interface for JAGS
% Version 1.3.2. Tested on JAGS 3.3.0, Windows 64-bit version
%
% This code has been adapted from MATBUGS that was written by Kevin Murphy and Maryam Mahdaviani
%
% [samples, stats] = matjags(dataStruct, bugsModelFileName, initStructs, ...)
%
% INPUT:
% dataStruct contains values of observed variables.
% jagsFilenm is the name of the model file
% initStructs contains initial values for the latent variables (unlike
% matbugs, this is a required variable)
% Note: variables with names 'a.b' in the bugs model file
% should be called 'a_b' in the matlab data structure.
%
% Optional arguments passed as 'string', value pairs [default in brackets, case
% insensitive]:
% 'monitorParams' - cell array of field names (use 'a_b' instead of 'a.b')
% [defaults to *, which currently does nothing...]
% 'nChains' - number of chains [3]
% 'nBurnin' - num samples for burn-in per chain [1000]
% 'nSamples' - num samples to keep after burn-in [5000]
% 'thin' - keep every n'th step [1]
% 'dic' - [1] read out DIC values
% 'workingDir' - directory to store temporary data/init/coda files [pwd/tmp]. Note that total number of iterations = nBurnin + nSamples * thin.
% 'savejagsoutput' - 0/1 = do/do not produce text files with output from JAGS
% 'verbosity' -
% 0 = no messages during runtime;
% 1 = minimum number of messages (e.g. which chain is being executed);
% 2 = maximum number of messages
% 'cleanup' - 0/1 -- do we want to remove PREVIOUS temporary files?
% 'dotranspose' - Set to 0 (default) if you want to insure compatibility with matbugs/WinBUGS
%
% OUTPUT
% S contains the samples; each field may have a different shape:
% S.theta(c, s) is the value of theta in sample s, chain c
% (scalar variable)
% S.theta(c, s, i) is the value of theta(i) in sample s, chain c
% (vector variable)
% S.theta(c, s, i, j) is the value of theta(i,j) in sample s, chain c
% (matrix variable)
%
% stats contains various statistics, currently:
% stats.mean, stats.std and stats.Rhat, stats.DIC.
% Each field may have a different shape:
% stats.mean.theta
% stats.mean.theta(i)
% stats.mean.theta(i,j)
%
% Rhat is the "estimated potential scale reduction" statistic due to
% Gelman and Rubin.
% Rhat values less than 1.1 mean the chain has probably converged for
% this variable.
%
% Example
%
% [samples, stats ] = matjags( ...
% datastruct, ...
% fullfile(pwd, 'Gaussian_3.txt'), ...
% init0, ...
% 'doparallel' , 0, ...
% 'nchains', nchains,...
% 'nburnin', nburnin,...
% 'nsamples', nsamples, ...
% 'thin', 1, ...
% 'dic' , 1,...
% 'monitorparams', {'mu','sigma'}, ...
% 'savejagsoutput' , 1 , ...
% 'verbosity' , 1 , ...
% 'cleanup' , 0 , ...
% 'showwarnings' , 1 , ...
% 'workingdir' , 'tmpjags' );
%
% For Windows users:
% The JAGS executable should be placed in the windows path
% In Windows 7, go to Control Panel, System and Security, System
% and click on "Advanced System Settings" followed by "Environment Variables"
% Under System variables, click on Path, and add the jags path to the string
% This could look something like "C:\Program Files\JAGS\JAGS-4.3.0\x64\bin"
%
% For MAC users:
% Mike Kalish provided some suggestions to make matjags work on a Mac. These were implemented
% in the current version but this code is not thouroughly tested yet. Please send me any changes in
% the code needed to make matjags run on a Mac/Linux/Unix system.
%
% Written by Mark Steyvers (mark.steyvers@uci.edu) based on the code
% MATBUGS that was written by Maryam Mahdaviani (maryam@cs.ubc.ca)
% and Kevin Murphy (murphyk@cs.ubc.ca)
% Changes in version 1.3.2:
% * Bug fix. Fixed issue with initialization of parameter values. All values were set to the value for
% chain 1.
% Changes in version 1.3:
% * Bug fix. Added lines marked by "% GP" as suggested by Ganesh Padmanabhan
% Changes in version 1.1:
% * The warnings produces by JAGS are now suppressed by default. This removes
% any message about adaptation being incomplete due to a small number of
% burnin iterations
% * Changed the default working directory for JAGS to make it platform
% independent
if ispc
defaultworkingDir = 'c:\temp\jagstmp'; % change this to fit your particular needs
elseif ismac | isunix
defaultworkingDir = [ '..' filesep 'jagstmp' ]; % change this to fit your particular needs
end
% Get the parameters
[ nChains, workingDir, nBurnin, nSamples, ...
monitorParams, thin, dodic, doParallel, savejagsoutput, verbosity, cleanup, showwarnings,dotranspose,doboot ] = ...
process_options(...
varargin, ...
'nChains', 1, ...
'workingDir', defaultworkingDir, ...
'nBurnin', 1000, ...
'nSamples', 5000, ...
'monitorParams', {}, ...
'thin', 1, ...
'dic' , 1, ...
'doParallel' , 0, ...
'savejagsoutput' , 1, ...
'verbosity' , 0,...
'cleanup' , 0, ...
'showwarnings' , 0 , ...
'dotranspose' , 0 );
if length( initStructs ) ~= nChains
error( 'Number of structures with initial values should match number of chains' );
end
[ whdir , jagsModelBase , modelextension ] = fileparts( jagsFilenm );
jagsModel = [ jagsModelBase modelextension ];
% get the current directory
curdir = pwd;
cd( whdir );
%[status,result] = dos( 'dir' )
% Does the temporary directory exist? If not, create it
if ~exist( workingDir , 'dir' )
[SUCCESS,MESSAGE,MESSAGEID] = mkdir(workingDir);
if SUCCESS == 0
error( MESSAGE );
end
end
cd( workingDir );
% Do we want to cleanup files before we start?
if cleanup==1
delete( 'CODA*' );
delete( 'jag*' );
end
% Create the data file
jagsData = 'jagsdata.R';
dataGenjags(dataStruct, jagsData , '', dotranspose );
nmonitor = length( monitorParams );
if nmonitor == 0
error( 'Please specify at least one node name to monitor' );
end
% Develop a separate JAGS script for each chain
for whchain=1:nChains
jagsScript = sprintf( 'jagscript%d.cmd' , whchain );
codastem = sprintf( 'CODA%d' , whchain );
InitData = sprintf( 'jagsinit%d.R' , whchain );
% Create the jags script for this chain
[ fid , message ] = fopen( jagsScript , 'wt' );
if fid == -1
error( message );
end
fprintf( fid , 'load wiener\n' ); % Adding the Wiener Diffusion model
fprintf( fid , 'load glm\n' );
%fprintf( fid , 'model in "..%s%s"\n' , filesep , jagsModel' );
if dodic
fprintf( fid , 'load dic\n' );
end
if ~isempty(whdir) && (strcmp(whdir(1),filesep) || (length(whdir) > 2 && whdir(2) == ':'))
% Case when a full path string is specified for the jagsModel
fprintf( fid , 'model in "%s"\n' , fullfile(whdir , jagsModel));
else
% Case when a relative path string is specified for the jagsModel
fprintf( fid , 'model in "%s"\n' , fullfile(curdir , whdir, jagsModel));
end
fprintf( fid , 'data in %s\n' , jagsData' );
fprintf( fid , 'compile, nchains(1)\n' );
fprintf( fid , 'parameters in %s\n' , InitData );
fprintf( fid , 'initialize\n' );
fprintf( fid , 'update %d\n' , nBurnin );
for j=1:nmonitor
fprintf( fid , 'monitor set %s, thin(%d)\n' , monitorParams{ j } , thin );
end
if dodic
%fprintf( fid , 'monitor deviance, thin(%d)\n' , thin );
fprintf( fid , 'monitor deviance\n' );
%fprintf( fid , 'monitor pD\n' );
end
fprintf( fid , 'update %d\n' , nSamples * thin );
fprintf( fid , 'coda *, stem(''%s'')\n' , codastem );
fclose( fid );
% Create the init file
addlines = { '".RNG.name" <- "base::Mersenne-Twister"' , ...
sprintf( '".RNG.seed" <- %d' , whchain ) };
dataGenjags( initStructs( whchain ), InitData , addlines, dotranspose );
end
% Do we use the Matlab parallel computing toolbox?
if doParallel==1
%numworkers = matlabpool('size');
%if (numworkers == 0)
% error( 'Matlab pool of workers not initialized. Use command "matlabpool open 7" for example to open up a pool of 7 workers' );
%end
status = cell( 1,nChains );
result = cell( 1,nChains );
if ispc
parfor whchain=1:nChains
jagsScript = sprintf( 'jagscript%d.cmd' , whchain );
cmd = sprintf( 'jags %s' , jagsScript );
if verbosity > 0
fprintf( 'Running chain %d (parallel execution)\n' , whchain );
end
[status{ whchain },result{whchain}] = dos( cmd );
end
elseif ismac | isunix
parfor whchain=1:nChains
jagsScript = sprintf( 'jagscript%d.cmd' , whchain );
%jagsPrefix = ''; %sprintf('/usr/local/bin/');
jagsPrefix = sprintf('/cluster/home/ofaull/jags/bin/');
cmd = sprintf( '%sjags %s' ,jagsPrefix, jagsScript );
if verbosity > 0
fprintf( 'Running chain %d (parallel execution)\n' , whchain );
end
[status{ whchain },result{whchain}] = dos( cmd );
end
end
else % Run each chain serially
status = cell( 1,nChains );
result = cell( 1,nChains );
for whchain=1:nChains
jagsScript = sprintf( 'jagscript%d.cmd' , whchain );
if ispc
cmd = sprintf( 'jags %s' , jagsScript );
elseif ismac | isunix
%jagsPrefix = ''; %sprintf('/usr/local/bin/');
jagsPrefix = sprintf('/cluster/home/ofaull/jags/bin/');
cmd = sprintf( '%sjags %s' ,jagsPrefix, jagsScript );
end
if verbosity > 0
fprintf( 'Running chain %d (serial execution)\n' , whchain );
end
[status{ whchain },result{whchain}] = dos( cmd );
end
end
% Save the output from JAGS to a text file?
if savejagsoutput==1
for whchain=1:nChains
filenm = sprintf( 'jagoutput%d.txt' , whchain );
[ fid , message ] = fopen( filenm , 'wt' );
if fid == -1
error( message );
end
resultnow = result{whchain};
fprintf( fid , '%s' , resultnow );
fclose( fid );
end
end
%% Do some error checking.
% For each chain, check if the output contains some error or warning message.
for whchain=1:nChains
resultnow = result{whchain};
statusnow = status{ whchain };
if status{whchain} > 0
cd( curdir );
error( [ 'Error from dos environment: ' resultnow ] );
end
% Do we get an error message anywhere from JAGS --> produce an error
pattern = [ 'can''t|RUNTIME ERROR|syntax error|failed' ];
errstr = regexpi( resultnow , pattern , 'match' );
if ~isempty( errstr )
cd( curdir );
fprintf( 'Error encountered in jags (chain %d). Check output from JAGS below:\n' , whchain );
fprintf( 'JAGS output for chain %d\n%s\n' , whchain , resultnow );
error( 'Stopping execution because of jags error' );
end
% Do we get a warning message anywhere from JAGS --> produce a matlab warning
if showwarnings ~= 0
pattern = [ 'WARNING' ];
errstr = regexpi( resultnow , pattern , 'match' );
if ~isempty( errstr )
warning( 'JAGS produced a warning message. Check the output below produced by the JAGS run' );
fprintf( 'JAGS output for chain %d\n%s\n' , whchain , resultnow );
end
end
if verbosity == 2
fprintf( 'JAGS output for chain %d\n%s\n' , whchain , resultnow );
end
% NOTE: if the error is "jags is not recognized as an internal or external
% command, then the jags bin folder is not on the windows path"
end
%% Extract information from the output files so we can pass it back to Matlab
codaIndex = 'CODA1index.txt'; % the index files are identical across chains, just pick first one
for i=1:nChains
codaF = [ 'CODA' , num2str(i) , 'chain1.txt' ];
S = bugs2mat(codaIndex, codaF);
structArray(i) = S;
end
samples = structsToArrays(structArray);
stats = computeStats(samples,doboot);
cd( curdir );
%% DIC calculation
if dodic
dbar = mean( samples.deviance(:));
dhat = min( samples.deviance(:));
pd = dbar - dhat;
stats.dic = pd + dbar;
end
end
%% ----- nested functions -----------------
function dataGenjags(dataStruct, fileName, addlines, dotranspose )
% This is a helper function to generate data or init files for JAGS
% Inputs:
% fileName: name of the text file containing initial values. for each
% chain we'll fileName_i where 'i' is the chain number,
% dataStruct: is a Struct with name of params(consistant in the same
% order with paramList) are fields and intial values are functions
if nargin<3
error(['This function needs three arguments']);
end
fieldNames = fieldnames(dataStruct);
Nparam = size(fieldNames, 1);
%fileName = [fileName, '.txt'];
fid = fopen(fileName, 'w');
if fid == -1
error(['Cannot open ', fileName ]);
end
%fprintf(fid,'list(');
for i=1:Nparam
fn = fieldNames(i);
fval = fn{1};
val = getfield(dataStruct, fval);
[sfield1, sfield2]= size(val);
msfield = max(sfield1, sfield2);
newfval = strrep(fval, '_', '.');
newfval = [ '"' newfval '"' ];
if ((sfield1 == 1) && (sfield2 == 1)) % if the field is a singleton
fprintf(fid, '%s <-\n%G',newfval, val);
%
% One-D array:
% beta = c(6, 6, ...)
%
% 2-D or more:
% Y=structure(
% .Data = c(1, 2, ...), .Dim = c(30,5))
%
elseif ((length(size(val)) == 2) && ((sfield1 == 1) || (sfield2 == 1)))
fprintf(fid, '%s <-\nc(',newfval);
for j=1:msfield
if (isnan(val(j)))
fprintf(fid,'NA');
else
% format for winbugs
fprintf(fid,wb_strval(val(j)));
end
if (j<msfield)
fprintf(fid, ', ');
else
fprintf(fid, ')');
end
end
else
% non-trivial 2-D or more array
valsize = size(val);
alldatalen = prod(valsize);
%alldata = reshape(val', [1, alldatalen]);
%alldata = alldata(:)';
%Truccolo-Filho, Wilson <Wilson_Truccolo@brown.edu>
if length(valsize)<3
if dotranspose==0
alldata = reshape(val, [1, alldatalen]);
else
alldata = reshape(val', [1, alldatalen]);
valsize = size( val' );
end
elseif length(valsize)==3
clear valTransp
if dotranspose==1
for j=1:valsize(3)
valTransp(j,:,:)=val(:,:,j)';%need a new variable, since val might be rectangular
end
alldata=valTransp(:)';
else % GP
alldata = reshape(val, [1, alldatalen]); % GP
end
else
['Error: 4D and higher dimensional arrays not accepted']
return
end
%fprintf(fid, '%s <-\nstructure(.Data=c(', newfval);
fprintf(fid, '%s <-\nstructure(c(', newfval);
for j=1:alldatalen
if (isnan(alldata(j)))
fprintf(fid,'NA');
else
% format for winbugs
fprintf(fid,wb_strval(alldata(j)));
end
if (j < alldatalen)
fprintf(fid,',');
else
fprintf(fid,'), .Dim=c(', alldata(j));
end
end
for j=1:length(valsize)
if (j < length(valsize))
fprintf(fid, '%G,', valsize(j));
else
fprintf(fid, '%G))', valsize(j));
end
end
end
if (i<Nparam)
%fprintf(fid, ', ');
fprintf(fid, '\n');
else
%fprintf(fid, ')\n');
fprintf(fid, '\n');
end
end
if length( addlines ) > 0
nextra = length( addlines );
for j=1:nextra
fprintf( fid , '%s\n' , addlines{ j } );
end
end
fclose(fid);
%%%%%%%%
end
function s = wb_strval(v)
% Converts numeric value to a string that is acceptable by winbugs.
% This is most problematic for exponent values which must have at least 1
% decimal and two digits for the exponent. So 1.0E+01 rather than 1E+001
% Note that only Matlab on PC does 3 digits for exponent.
s = sprintf('%G', v);
if strfind(s, 'E')
if length(strfind(s, '.')) == 0
s = strrep(s, 'E', '.0E');
end
s = strrep(s, 'E+0', 'E+');
s = strrep(s, 'E-0', 'E-');
end
%%%%%%%%
end
function f = fullfileKPM(varargin)
% fullfileKPM Concatenate strings with file separator, then convert it to a/b/c
% function f = fullfileKPM(varargin)
f = fullfile(varargin{:});
f = strrep(f, '\', '/');
%%%%%%%%
end
function A = structsToArrays(S)
% Suppose S is this struct array
%
% S(c).X1(s)
% S(c).X2(s,i)
% S(c).X3(s,i,j)
%
% where s=1:N in all cases
%
% Then we return
% A.X1(c,s)
% A.X2(c,s,i)
% A.X3(c,s,i,j)
C = length(S);
fld = fieldnames(S);
A = [];
for fi=1:length(fld)
fname = fld{fi};
tmp = getfield(S(1), fname);
sz = size(tmp);
psz = prod(sz);
data = zeros(C, psz);
for c=1:C
tmp = getfield(S(c), fname);
%data = cat(1, data, tmp);
data(c,:) = tmp(:)';
end
if sz(2) > 1 % vector or matrix variable
data = reshape(data, [C sz]);
end
A = setfield(A, fname, data);
end
end
%%%%%%%%%%%%
function [Rhat, m, s] = EPSR(samples)
%
% function [R, m, s] = EPSR(samples)
% "estimated potential scale reduction" statistics due to Gelman and Rubin.
% samples(i,j) for sample i, chain j
%
% R = measure of scale reduction - value below 1.1 means converged:
% see Gelman p297
% m = mean(samples)
% s = std(samples)
% This is the same as the netlab function convcalc(samples')
[n m] = size(samples);
meanPerChain = mean(samples,1); % each column of samples is a chain
meanOverall = mean(meanPerChain);
% Rhat only works if more than one chain is specified.
if m > 1
% between sequence variace
B = (n/(m-1))*sum( (meanPerChain-meanOverall).^2);