#2014.11.26 (updated: 2014.12.05)
#R Code for DataColada[30] #
#Written by Uri Simonsohn (email: uws@wharton.upenn.edu)
################################################################################################
#Overview
#The code below creates a function that generates studies, then selectively publishes them based either on
#statistical significance or effect size, and then attempts to correct for publication bias using Trim-and-Fill
#
#The function begins generating all studies with sample size distributed uniform between a low and a high value
#It then creates publication bias by only considering p<.05 results. It counts how many were excluded and then
#excludes, INSTEAD of the p<.05, the same % of smaller effect size studies.
#It then computes the simple naive estiamtes based only on published findings (first based on p<.05, then based on
#smaller effect size) and corrects both with Trim-and-Fill.
#
#Trim and Fill performs poorly in both cases.
########################################################################################################
#Libraries needed
library(metafor) #this is the meta-analysis package that contains the trim-and-fill routine
require(data.table) #useful for sorting datasets/
#Function starts here
colada30=function(d,ktot,n0,n1)
{
#Syntax
#d: effect size (assumed to be a fixed effect size)
#ktot: number of studies attempted
#no: small sample size per cell
#n1: big sample size per cell
#Generate sample size of studies attempted
n=round(runif(min=n0,max=n1,n=ktot),0) #uniform distribution of sample size
df=2*n-2 #compute the d.f. for a two sample t-test on those sample sizes
#Draw t-stats from non-central; these are the simulated studies with true effect size d.
t.all=rt(n=ktot,df=df,ncp=sqrt(n/2)*d)
#Convert t-values into observed ds
d.all=t.all*2/sqrt(2*n) #standard formula d=t*2/sqrt(2n)
#Compute variance of d
vard.all= 2/n +(d.all**2)/(2*(n-3.94)) #formula for variance of cohen's d
#compute p-values
p.all=2*(1-pt(t.all,df))
#PUBLICATION BIAS BASED ON P-VALUE
#Publishable subset that's p<.05
d.sig=d.all[p.all<.05]
vard.sig=vard.all[p.all<.05]
#Share of significant results
power=sum(p.all<.05)/ktot
#PUBLICATOIN BIAS BASED ON EFFECT SIZE
#Now that we know power, we are going to exclude the same % of studies based on effect size
#We begin by sorting the simulated studies by observed effect size,which in turn we begin by
#merging the t,d,df,var(d),
#Merge all into data-table,
dt.all=data.table(t.all, d.all, vard.all, p.all,key="d.all")
#We want to drop just as many studies for effect size as for p>.05. If 80% are p<.05 then we drop 20% based on p>.05
#so to make comparison comparable we drop the 20% of smaller effect sizes
k=(1-power)*ktot #k is the number of studies to drop
#Publishable set based on effect size
d.big=dt.all$d.all[(k+1):ktot] #taking it off the datatable so that it is in order
vard.big=dt.all$vard.all[(k+1):ktot]
#META-ANALYSES
#God's meta-analysis, sees all studies attempted
god=rma(yi=d.all,vi=vard.all,method="FE")
#Meta-analysis on subset of studies with big effects
naive.big=rma(yi=d.big,vi=vard.big,method="FE")
#Meta-analysis on subset of studies with p<.05
naive.sig=rma(yi=d.sig,vi=vard.sig,method="FE")
#Trim-and-fill correction for each
tf.big=trimfill(naive.big)
tf.sig=trimfill(naive.sig)
#We are done. Report results
results=round(c(god$b, naive.sig$b, tf.sig$b, naive.big$b, tf.big$b),3)
cat("\n True effect size is d=",d)
cat("\n Sample size (n) is ~uniform between n=",n0," and n=",n1)
cat("\n Power:",100*round(power,2),"%")
cat("\n\n\nGod: ",round(god$b,2))
cat("\n\n Naive estimate of studies with p<.05 :",round(naive.sig$b,2))
cat(" \n Trim and fill correction :",round(tf.sig$b,2))
cat("\n Naive estimate of studies with big effect sizes:",round(naive.big$b,2))
cat("\n Trim and fill correction: :",round(tf.big$b,2))
#plot
par(mfrow=c(1,2))
barplot(results,ylab="Cohen's d",main="Effect size estimates with and without Trim-and-Fill correction",cex.names=.75,
names.arg=c("God\n",
"Naive Case 1\ndropping p>.05",
"Trim-Fill\n(Case 1)",
"Naive Case 2\ndropping small d",
"Trim-Fill\n (Case 2)"))
abline(h=d,col="red")
text(.75,d*1.1,paste0("True Effect:",d),col="red")
text(c(2,3.15,4.45,5.55),.9*results[2:5],results[2:5])
funnel(tf.big,main='Funnel Plot Case 2')
}
set.seed(123)
r1=colada30(d=.25,ktot=1000,n0=10, n1=50)
set.seed(321)
r2=colada30(d=.15,ktot=1000,n0=50,n1=200)