To receive notifications about scheduled maintenance, please subscribe to the mailing-list gitlab-operations@sympa.ethz.ch. You can subscribe to the mailing-list at https://sympa.ethz.ch

Commit e5cf5dd0 authored by StefanThoma's avatar StefanThoma
Browse files

heterogen

Implemented data structure for study alb5 to work with some utilities function from heterogen_fn script.
parent de3cca7a
No preview for this file type
No preview for this file type
No preview for this file type
......@@ -6,61 +6,106 @@ output: html_document
---
```{r setup, include=FALSE}
rm(list=ls())
knitr::opts_chunk$set(echo = TRUE)
```
# Setup
```{r}
pacman::p_load(tidyverse, lme4, readxl, dtplyr, compute.es, relevance)
library(data.table)
pacman::p_load(tidyverse, lme4, readxl, dplyr, compute.es, relevance, lattice, rockchalk)
set.seed(12932406)
```
## Global parameters
```{r}
# Set some parameters
digitsForRounding <- 3
variables_used_in_manyLabs <- list(#sunk = c("sunkDV", "sunkgroup", "SMD"),
#gain_loss = c("gainlossDV", "gainlossgroup", "OR"),
#anch_ny_sf = c("Ranch1", "anch1group","SMD"),
#anch_pop_chic = c( "Ranch2", "anch2group","SMD"),
#anch_everest = c( "Ranch3", "anch3group","SMD"),
#anch_babies = c( "Ranch4", "anch4group","SMD"),
#gambfal = c("gambfalDV", "gambfalgroup","SMD"),
#scales = c("scales", "scalesgroup", "OR"),
#reciprocity = c("reciprocityus", "reciprocitygroup", "OR"),
#allowed_forbidden = c("allowedforbidden", "allowedforbiddenGroup", "OR"),
#quote = c("quote", "quoteGroup","SMD"),
#flag = c("flagdv", "flagGroup","SMD"),
#money = c("Sysjust", "MoneyGroup","SMD"),
#imag_cont = c("Imagineddv", "ContactGroup","SMD"),
alb5 = c("SATTotal", "Condition","SMD")
)
```
# Trial for Alb 5 Revised Protocol
[@albarracin2008]
## Load data.
```{r}
# dtplyr::lazy_dt() use this function in case it doesnt work later on...
# I really only need the first row.
dtml5 <- read_excel("Data_alb/ML5 Alb 5 Revised Protocol.xlsx")
dtmturk <- read_excel("Data_alb/ML5 Alb 5 RPP MTurk Protocol.xlsx")
dtvalidate <- read_excel("Data_alb/Validation Study 10 9 2019.xlsx")
dtaffect <- read_excel("Data_alb/ML5 Alb 5 Revised Protocol subset of locations with happy sad.xlsx")
dtaffect_mturk <- read_excel("Data_alb/ML5 Alb 5 RPP MTurk Protocol with happiness and anger.xlsx")
#dtmturk <- read_excel("Data_alb/ML5 Alb 5 RPP MTurk Protocol.xlsx")
#dtvalidate <- read_excel("Data_alb/Validation Study 10 9 2019.xlsx")
#dtaffect <- read_excel("Data_alb/ML5 Alb 5 Revised Protocol subset of locations with happy sad.xlsx")
#dtaffect_mturk <- read_excel("Data_alb/ML5 Alb 5 RPP MTurk Protocol with happiness and anger.xlsx")
```
## Clean Data
```{r}
source("Utilities/gettingStarted-fn.R")
alb5 <- list("alb5" = dtml5)
alb5 <- get_data_ready(alb5)
```
## Original study
```{r}
# Albarracin 2008 study 5 (total N = 36)
# Save information in a list
orig <- list(m.1 = 12.83, m.2 = 10.78, sd.1 = 1.86, sd.2 = 3.15, n.1 = 18, n.2 = 18)
alb5$orig <- list(m.1 = 12.83, m.2 = 10.78, sd.1 = 1.86, sd.2 = 3.15, n.1 = 18, n.2 = 18)
# Get effect measures
do.call(mes, orig)
do.call(mes, alb5$orig)
```
## Look at data
```{r}
head(dtml5)
dtml5 %>% group_by(Location) %>%
head(alb5$alb5)
alb5$alb5 %>% group_by(Location) %>%
summarise("N" = length(unique(ResponseId)),
"SAT_mean" = mean(SATTotal),
"SAT_sd" = sd(SATTotal))
"SAT Total mean" = mean(SATTotal),
"SAT sd" = sd(SATTotal)) %>%
ungroup()
```
# Fit the model
## Fit the model
```{r}
source("Utilities/heterogen-fn.R")
alb5$heterogen$mm <- MixedModels(variables = c("SATTotal", "Condition"), dat = alb5$alb5, methodFamily = "gaussian")
alb5$heterogen$lrt <- LRTfunction(alb5$heterogen$mm)
```
The output: "random intercept only is the preferred model. " means that for this data, the preferred model includes a random intercept, but no random slope.
This means that while the average number of solved items varied between the locations, the priming effect did not.
## Residuals
```{r}
ml5_m0 <- lmer(SATTotal ~ 1 + (1 | Location), data = dtml5)
# add effect of condition
ml5_m1 <- lmer(SATTotal ~ 1 + Condition + (1 | Location), data = dtml5)
res <- anova(ml5_m0, ml5_m1) # compare models
relevance::termtable()
confint(ml5_m1)
diagnosticPlot(alb5$heterogen$lrt, percent = 1)
```
The residuals look very good. This is particularly visible on the Normal Q-Q Plot.
I therefore think that a linear mixed effects modeling approach is justified for this data.
An alternative approach would for example a poisson regression.
Because we have no heterogeneity in effect size, we can commence with the traditional approach.
## Plot alb5 study
```{r}
```
This diff is collapsed.
###################################
### Getting started ###
###################################
## This function checks if package is installed. If not it installs it and requires it
##' @param packName A package name
##' @return None
##' @author Federico Rogai and Lorenz Herger
installPack <- function(packName) {
if(!require(packName,character.only=T )){
install.packages(packName)
library(packName,character.only=T )
}
}
##########################################################################
## This function replaces values in a data.frame with new values
## It allows for an overlap between the old and and the new valuses
##' @param table data.frame containing the data
##' @param column name of the column in which values need to be replaced
##' @param old_values old values that need to be replaced by new values
##' @param new_values new values that will replace the old values
##' the ith entry of new:value replaces the ith entry of old_values
##' @return table data.frame where the values have been replaced
##' @author Lorenz Herger
##'
replace_values <- function(table, column, old_values, new_values) {
if (length(old_values) != length(new_values)) {
stop("Vectors \"old_values\" and \"new_values\" are not of equal length.")
}
table$helper <- NA
for (i in 1:length(old_values)) {
table[table[[column]] == old_values[i] &!is.na(table[[column]]),]$helper <- new_values[i]
}
table[[column]] <- table$helper
table$helper <- NULL
table
}
##############################################################################
## This function looks up the unique values of a binary column and swaps them
##' @param tb data.frame containing the data
##' @param col name of the column in which values need to be swapped
##' @return tb data.frame where the values have been replaced
##' @author Lorenz Herger
##'
swap_binary_values <- function(tb, col) {
trt <- unique(tb[[col]])
trt <- trt[!is.na(trt)]
if (! length(trt == 2)) stop("Treatment variable is not binary.")
rev_trt <- rev(trt)
tb <- replace_values(tb, col, trt, rev_trt)
}
####################################################################################
## This function takes a list with a data frame and its corresponding study name and makes adjustments necessary for our analysis
##' @param list.dat the list containing the raw data (named after study)
##' @return dat_clean data.frame containing data that is ready for analysis
##' @author Lorenz Herger, adapted by Stefan Thoma
get_data_ready <- function(list.dat) {
# list.dat <- alb5
study <- names(list.dat)
if(length(study) > 1) stop("Wrong input structure")
dat <- list.dat[[study]]
# Remove SPSS-stlye attributes and classes
dat[] <- lapply(dat, as.vector)
dat <- as.data.frame(dat)
# preprocess data
if(study == "alb5"){
dat <- dat %>% select(-starts_with("..."))
}
if(study == "alb5"){
dat <- replace_values(table = dat, column = "Condition", old_values = c("action", "inaction"), new_values = c(1, 0))
}
list.dat[[study]] <- dat
return(list.dat)
}
###################################
### Mixed Effect Models ###
###################################
##' Classical Meta-Analysis procedure
##' @param input_table single element of the list effect_tables (contain effect's estimate and sd associated with it)
##' @return Overall estimated effect and CI for it. Moreover Q-value and associated p-values are reported
##' to assess heterogeneity in the estimated effects
##' @author Federico Rogai
meta_analysis <- function(input_table , level=.95, calculate_OR=FALSE) {
meta_results <- rma(yi, vi, data=input_table, level=level, method="REML")
effectsize <- as.vector(meta_results$beta)
CI <- c(lower_bound = meta_results$ci.lb, upper_bound = meta_results$ci.ub)
q_test <- c(q_statistic = round(meta_results$QE, 2),
p_value = round(meta_results$QEp,5))
lis <- list(effect=effectsize, confint=CI, Q_test = q_test, ratio_vars=meta_results$se.tau2/(meta_results$se))
}
##########################################
# Function which fits two mixed models and a fixed effect model and returns objects containing them.
##' @param variables List of two elements. The first one is the dependent variable, the second one is the treatment variable
##' @param dat dataset
##' @param methodFamily which family does the error distribution belong to? ("gaussian", "binary" or "poisson")
##' @param transformation name of the transformation that should be applied to the response (e.g. log).
##' @return list with three objects of fitted models. The first is the "empty model", i.e. a model without random effects
##' but still with the fixed treatment effect. The second is the model with the random interaction. The third is the model
##' with an interaction between treatment and (random) university effect.
##' @author Federico Rogai
MixedModels<-function(variables, dat, methodFamily, transformation=NULL){
yVar<-variables[[1]]
xVar<-variables[[2]]
if(!is.null(transformation)) {
FUN <- match.fun(transformation)
dat[, yVar]<-FUN(dat[,yVar])
}
if(tolower(methodFamily)!="gaussian"){
fit.empty<-glm(formula=as.formula(paste0(yVar," ~", xVar)), dat, family=methodFamily, na.action = na.omit)
fit0 <- glmer(formula=as.formula(paste0(yVar," ~", xVar, "+ (1|Location)")), dat, family=methodFamily)
fit1 <- glmer(formula=as.formula(paste0(yVar," ~", xVar, "+ (1+", xVar ,"|Location)")), dat, family=methodFamily)
}
else{
fit.empty<-aov(formula=as.formula(paste0(yVar," ~", xVar)), dat, na.action=na.omit)
fit0 <- lmer(formula=as.formula(paste0(yVar," ~", xVar, "+ (1|Location)")), dat)
if(xVar=="sex"){ # We had errors at convergence: this is an ad hoc solution
fit1 <- lmer(formula=as.formula(paste0(yVar," ~", xVar, "+ (1+", xVar ,"|Location)")), dat,
control = lmerControl(optimizer ="Nelder_Mead"))
}
else{ fit1 <- lmer(formula=as.formula(paste0(yVar," ~", xVar, "+ (1+", xVar ,"|Location)")), dat)
}
}
list(fit.empty,fit0, fit1)
}
Example<-FALSE # An example to evaluate if the functions below work
if(Example){
memo.ex<-MixedModels(list("sunkDV", "sunkgroup"),df,"gaussian")
memo.ex1<-MixedModels(list("scales", "scalesgroup"),df,"binomial")
}
##########################################
##' Function computes a likelihood ratio test (LRT) between the model with and without random intercept. If the p-value for this test is
##' smaller than 0.05, we compute the LRT between the model with the random slope and the one without this term (but with the
##' random intercept). If the larger model delivers a superior fit to the data, we print the summary of that model. Otherwise we
##' report the summary of the model we had chosen before.
##' Note: to compute the LRT we need to have the ML estimate. anova() automatically refits the models,
##' whose parameters were estimated using REML
##' @param modelsToCompare Output of function MixedModels, i.e. list with three model fits which are to be compared
##' @return It returns a list with five elements.
##' The first element is the model we have chosen (model with 1-merely fixed effects, 2- random intercept only,
##' 3- with random intercept and random slope)
##' The second element reports the p-value of the hypothesis we have tested in order to choose the model
##' The third element the orginal fit for the "winning model" as from MixedModels
##' The fourth returned argument is the summary of the chosen model
##' The fifth element are the confidence interval of the winning model
##' @author Federico Rogai
LRTfunction<-function(modelsToCompare){
pval0<-anova(modelsToCompare[[3]], modelsToCompare[[1]], test = "LRT")[["Pr(>Chisq)"]][[2]]
pval1<-anova(modelsToCompare[[2]], modelsToCompare[[3]], test = "LRT")[["Pr(>Chisq)"]][[2]]
if(pval0<0.05 & pval1<0.05){
testResult<- cat("Random interaction needed! \n Larger model fits data better than
purely fixed effect \n and random intercept only models. \n
p-values of LRT",round(pval0,digitsForRounding), " \n
respectively",round(pval1,digitsForRounding), ".")
finalModel<-3
}
else{
pval2<-anova(modelsToCompare[[2]], modelsToCompare[[1]], test = "LRT")[["Pr(>Chisq)"]][[2]]
if(pval1>0.05 & pval2<0.05){
testResult<- cat("Random intercept only is the preferred model. \n
p-value of LRT",round(pval1,digitsForRounding), " comparing it to the random slope model \n
respectively",round(pval2,digitsForRounding)," to the fixed effects only model")
finalModel<-2
}
if(pval2>0.05 & pval0>0.05){
testResult<- cat("Pure fixed effect model is the preferred model. \n
p-value of LRT",round(pval0,digitsForRounding), " comparing it to the random slope model \n
respectively",round(pval2,digitsForRounding)," to the random intercept only model")
finalModel<-1
}
else{ cat("WARNING: contraddictory evidence from the LRTs")}
}
winningModel<-summary(modelsToCompare[[finalModel]])
list(finalModel, testResult,modelsToCompare[finalModel], list("Summary of best model", winningModel,confint(modelsToCompare[[finalModel]], method="Wald")))
}
if(Example){
wM<-LRTfunction(memo.ex)
wM1<-LRTfunction(memo.ex1)
}
##########################################
##' This function tests if a model in which the correlation between the two random effects (random intercept and random slope)
##' is removed fits the data as good as one in which this term is present. If it does, it is compared to the random intercept
##' model only.
##' @param ModelCorr it is the output from the function MixedModels.
##' It is passed immediately to LRT since we need the fit of the "winning model".
##' @return Returns an output from anova(model.1, model.2), which tells us
##' a. Is the model without correlation as good as the one in which we allow for it?
##' b. if the answer for a) is "Yes", we use a LRT as implemented in anova() to compare this model to the one
##' with the random Intercept only
##' @author Federico Rogai
TestCorrRE<-function(ModelCorr){
if(LRTfunction(ModelCorr)[[1]]!=3) {"Model does not contain random slope"}
else{
fitCorr<-LRTfunction(ModelCorr)[[3]][[1]] # Extracting the fitted object
treatment<-colnames(fitCorr@frame[2]) # Treatment name
university<-colnames(fitCorr@frame[3]) # University (referrer)
# Updating the model to make the RE uncorrelated
fitUncorr<-update(fitCorr, formula= as.formula(paste0(".~", treatment, "+ (1 + ", treatment, "||", university,")")))
LRTcorr<-anova(fitUncorr, fitCorr)
if(LRTcorr[["Pr(>Chisq)"]][[2]]>0.05){
fitREinterc<-update(fitCorr, formula= as.formula(paste0(".~", treatment, "+ (1 |", university,")")))
return(list("The model without correlation fits the data as well as the one where we allow for it",
"Comparing that model to the Random Intercept only model we obtain the following result",
anova(fitREinterc, fitUncorr)))
}
list("The model without correlation does NOT fit the data as well as the one where we allow for it",
LRTcorr)
}
}
if(Example){
TestCorrRE(memo.ex)
TestCorrRE(memo.ex1)
}
##########################################
##' Moderation analysis for lmer objects. We compute an F-test in anove (SS of type III) and use the p-values
##' computed by the package lmerTest to assess the significance of the interaction terms. Unfortunately such an
##' approach cannot be used for glmer objects (lmerTest does not return p-values for this method).
##' For the sake of consistency we use the function CI_FE_moderation to do that.
##' FUNCTION NOT USED IN THE END
##' @param ModelCorr it is the output from the function LRTfunction (we need the fit of the "winning model")
##' @return Which model is chosen? p-values supporting the decision
##' @author Federico Rogai
LmerMA<-function(fitModer){
library(lmerTest)
F.test<-anova(fitModer)
indexF<-length(F.test[["Pr(>F)"]])
if(F.test[["Pr(>F)"]][(indexF-1)]<0.05 & F.test[["Pr(>F)"]][(indexF)]<0.05 ){
return(list("Both interaction terms are significant at the 0.05 level", F.test[["Pr(>F)"]][(indexF-1):indexF]))
}
if(F.test[["Pr(>F)"]][(indexF-1)]<0.05){
return(list("The interaction term with the US- not US binary variable is significant at the 0.05 level",
F.test[["Pr(>F)"]][(indexF-1)]))
}
if(F.test[["Pr(>F)"]][(indexF)]<0.05 ){
return(list("The interaction term with the Lab- Online binary variable is significant at the 0.05 level",
F.test[["Pr(>F)"]][(indexF)]))
}
else{
list("None of the interaction terms is significant at the 0.05 level", F.test[["Pr(>F)"]])
}
}
##########################################
##' Moderation analysis. We construct the Wlad CI for the fixed effects. If any interaction term's coefficient does
##' cover zero, we keep the term in the model
##' @param ModelCorr it is the output from the function LRTfunction (we need the fit of the "winning model")
##' @return Which model is chosen? p-values supporting the decision
##' @author Federico Rogai
CI_FE_moderation<-function(fitModer){
CI<-confint(fitModer, method="Wald")
indexCI<-nrow(CI)
if((sign(CI[indexCI-1,1])==sign(CI[indexCI-1,2])) & (sign(CI[indexCI,1])==sign(CI[indexCI,2])) ){
return(list("Both interaction terms are significant at the 0.05 level",CI[(indexCI-1):indexCI,1:2]))
}
if((sign(CI[indexCI-1,1])==sign(CI[indexCI-1,2])) ){
return(list("The interaction term with the US- not US binary variable is significant at the 0.05 level",
CI))
}
if( (sign(CI[indexCI,1])==sign(CI[indexCI,2])) ){
return(list("The interaction term with the Lab- Online binary variable is significant at the 0.05 level",
CI))
}
else{
list("None of the interaction terms is significant at the 0.05 level",CI)
}
}
##########################################
##' This function inserts the fixed effects predictors "us_or_international" and "lab_or_online" in the "winning
##' model" from the Likelihood ratio test. These two predictors are separately made interact with the treatment
##' effect to evaluate if they are to be considered moderators.
##' @param ModelCorr it is the output from the function LRTfunction (we need the fit of the "winning model")
##' @return See CI_FE_moderation
##' @author Federico Rogai
moderationAnalysis<-function(ModelCorr){
bestFit<-LRTfunction(ModelCorr)[[3]][[1]] # Extracting the fitted object
treatment<-colnames(bestFit@frame[2]) # Treatment name
fitModer<-update(bestFit, formula= as.formula(paste0(".~. +us_or_international+ lab_or_online+",
treatment, ":us_or_international +",
treatment, ":lab_or_online")))
CI_FE_moderation(fitModer)
}
if(Example){
moderationAnalysis(memo.ex)
moderationAnalysis(memo.ex1)
}
##########################################
##' Interaction plot for one Research Question
##' @param variables one element of the list use_var_list, containing response and explanatory variable
##' @param dat dataset used
##' @return Interaction plot
##' @author Federico Rogai
interactPlot<-function(variables, dat){
y_var<-variables[[1]]
x_var<-variables[[2]]
set.seed(15)
uni_sampled<-sample(unique(dat$sample),length(unique(dat$sample))/2)
df1<-subset(dat, !is.na(get(x_var,dat)) & !is.na(get(y_var,dat)) & (dat$sample %in% uni_sampled))
ggplot(df1, aes(x = get(x_var,df1), y = get(y_var,df1))) +
stat_summary(fun.y = mean, geom = "line", aes(x = get(x_var,df1), y = get(y_var,df1),group = referrer)) +
theme_bw()+
xlab(x_var) +
ylab(y_var)
}
if(FALSE){
interactPlot(use_vars_list[[1]], df)
}
##########################################
##' Interaction plot for one Research Question (within colored lines for moderator)
##' @param variables one element of the list use_var_list, containing response and explanatory variable
##' @param dat dataset used
##' @return Interaction plot
##' @author Federico Rogai
moderationInteractionPlot<-function(variables, dat, moderator){
y_var<-variables[[1]]
x_var<-variables[[2]]
mod<-get(moderator,dat)
moderator_0<-sample(unique(dat$sample[mod==0]),length(unique(dat$sample[mod==0]))/2)
moderator_1<-sample(unique(dat$sample[mod==1]),length(unique(dat$sample[mod==1]))/2)
joint_sample<-c(moderator_0, moderator_1)
df1<-subset(dat, !is.na(get(x_var,dat)) & !is.na(get(y_var,dat)) & (dat$sample %in% joint_sample))
ggplot(df1, aes(x = get(x_var,df1), y = get(y_var,df1), color=factor(get(moderator,df1))))+
stat_summary(fun.y = mean, geom = 'line', aes(x = get(x_var,df1), y = get(y_var,df1), group = Location)) +
theme_bw()+
theme(legend.position="bottom") +
scale_x_discrete( expand = c(0, 0)) +
labs(colour = paste(as.character(moderator)))+
xlab(x_var) +
ylab(y_var)
}
if(FALSE){
moderationInteractionPlot(use_vars_list[[9]], df, "us_or_international")
}
##########################################
##' Diagnostic Plot for "winning model" as defined by the likelihood ratio test.
##' @param winningModel best model, according to LRT
##' @return Tukey Anscombe plot, Q-Q plot for RE and residuals' Q-Q plot.
##' @author Federico Rogai
diagnosticPlot<-function(winningModel, percent = 1){
if(!{percent> 0 & percent<=1}) stop("percentage has to be between 0 and 1")
model<-winningModel[[3]][[1]]
y_name<-names(model@frame)[[1]]
x_name<-names(model@frame)[[2]]
show_less_points<-sample(length(resid(model)),floor(percent * length(resid(model)))) # Possible to show less points if
# one wants to "see more"
# Fitted values vs residuals
plot(resid(model)[show_less_points]~fitted(model)[show_less_points], xlab=x_name, ylab=y_name, main= "Tukey Anscombe Plot" )
lines(loess.smooth(fitted(model)[show_less_points],resid(model)[show_less_points] , family="gaussian"), col="blue")
# Q-Q plot of random effects
print(qqmath(ranef(model)))
## Q-Q plot of residuals
qqnorm(resid(model)[show_less_points])
}
if(FALSE){
diagnosticPlot(wM)
diagnosticPlot(wM1)
}
###################################
### Functions for replication ###
###################################
use_vars_list <- list(sunk = c("sunkDV", "sunkgroup", "SMD"),
gain_loss = c("gainlossDV", "gainlossgroup", "OR"),
anch_ny_sf = c("anchoring1", "anch1group", "SMD"),
anch_pop_chic = c( "anchoring2", "anch2group", "SMD"),
anch_everest = c( "anchoring3", "anch3group", "SMD"),
anch_babies = c( "anchoring4", "anch4group", "SMD"),
gambfal = c("gambfalDV", "gambfalgroup", "SMD"),
scales = c("scales", "scalesgroup", "OR"),
reciprocity = c("reciprocityus", "reciprocitygroup", "OR"),
allowed_forbidden = c("allowedforbidden", "allowedforbiddenGroup", "OR"),
quote = c("quote", "quoteGroup", "SMD"),
flag = c("flagdv", "flagGroup", "SMD"),
money = c("Sysjust", "MoneyGroup", "SMD"),
imag_cont = c("Imagineddv", "ContactGroup", "SMD"),
math = c("d_art", "sex", "SMD")
)
###########################################################################
## Creates a table showing the standardized mean difference obtained by
## each of the 36 universities that tried to replicate a specific study.
## Note that the SMD calculated by this function contains a bias correction
## and therefore differs slighlty from the values calculated by the authors
## of the original paper and by our function "smd_cohen".
## This function is used in the function "create_overview_tables"
##' @param dat data.frame containing the full data from the manyLabs experiments
##' @param treatment_var Name of the column in dat that contains the treatment
##' assignments.
##' @param response_var Name of the column in dat that contains the response.
##' @return output data.frame with one row for each of the 36 replication attempts.
##' Each row of output contains the sample sizes and means for both treatment groups
##' as well as the the standardized mean difference.
##' @author Lorenz Herger
escalc_inputs_smd <- function(dat, treatment_var, response_var){
dat <- dat[!is.na(dat[[treatment_var]]) &!is.na(dat[[response_var]]), ]
levels_treatment <- sort(unique(dat[[treatment_var]]), decreasing = TRUE) # sort since escalc interprets 1
# as treatmenr 2 as control
labs <- unique(dat$referrer)
input <- data.frame(n1 = numeric(), n2 = numeric(), m1 = numeric(), m2 = numeric(),
sd1 = numeric(), sd2 = numeric())
for (i in labs) {
# Treatment 0 (control)
repl_trt <- dat[dat$referrer == i & dat[[treatment_var]] == levels_treatment[1], ][[response_var]]
input[i,"n1"] <- length(repl_trt)
input[i, "m1"] <- mean(repl_trt)
input[i, "sd1"] <- sd(repl_trt)
# Treatment 1 (treatment)
repl_trt <- dat[dat$referrer == i & dat[[treatment_var]] == levels_treatment[2], ][[response_var]]
input[i,"n2"] <- length(repl_trt)
input[i,"m2"] <- mean(repl_trt)
input[i,"sd2"] <- sd(repl_trt)
}
output <- escalc(measure="SMD", m1i=m1, sd1i=sd1, n1i=n1, m2i=m2, sd2i=sd2, n2i=n2, data=input)
}
########################################################################
## Creates a table showing the odds ratios obtained by each of the 36
## universities that tried to replicate a specific study.