#R code for calculations of DataColada[54] #This code modifies the code for Figure 1 in this post #To compute the expected sample size for the two replications the 90-75-50 heuristic would result in. #Written by Uri Simonsohn (uws@wharton.upenn.edu) #Please email me directly if you see any errors or have any comments. #Last update: 2016 10 31 ############################################## #Load power library library(pwr) #Create two functions for power calculations getn=function(d,power) round(pwr.t.test(d=d,power=power)$n) getpower=function(d,n) round(100*pwr.t.test(d=d,n=n)$power) #Key function, simulate the 7590 heuristic sim7590=function(simtot,n.ori,power=NA,seed,p=NA) { #Syntax: #simtot: number of simulations #n.ori: sample size of original study #power: power of original study #p : p-value of original sutdy #note: either p or power are set, not both. # if power, we have a distribution of original effect # sizes, if p just one possible effect size. #(1)PARAMETERS ORIGINAL STUDY #1.1 Set d #If user entered power if (!is.na(power)) { #If power=.05, true effect is zero if (power==.05) d.true=0 #If power>.05, find true effect size with power formula if (power>.05) d.true=pwr.t.test(n=n.ori,power=power)$d #d that gives n.ori power } #If user entered p-value, d=0 if (is.na(power)) d.true=0 #1.2 degrees of freedom df.ori=2*n.ori-2 #1.3 Seed set.seed(seed) #1.4 Critical t tc=qt(p=.975,df=df.ori) #(2) SIMULATE ORIGINAL #2.1 If user entered power for original study if (!is.na(power)) ts=qt(runif(n=3*(1/power)*simtot),df=df.ori,ncp=sqrt(n.ori/2)*d.true) #Note: i generate more that simtot original studies becuase some will not be significnat # in particular, 1/power get's you the expected share needed to get simtot, and i add 50% to be safe #2.2 if user entered p-value for original study if (is.na(power) && !is.na(p)) ts=rep(qt(1-p/2,df=df.ori),simtot) #2.3 Keep significant ones (first simtot of them) t.sig=ts[ts>=tc] t.sig=t.sig[1:simtot] #2.4 Observed significant effects original research d.ori=2*t.sig/sqrt(2*n.ori) #(3) REPLICATION 1 #3.1 sample sizes computed with 75x90 heuristic for observed effect size n.rep1=mapply(getn,d=d.ori*.75,power=.9) df.rep1=2*n.rep1-2 #3.2 Draw replication 1 t.rep1=qt(runif(n=simtot),df=df.rep1,ncp=sqrt(n.rep1/2)*d.true) #3.3 p-values replication 1 p.rep1=2*(1-pt(abs(t.rep1),df=df.rep1)) sig.rep1=p.rep1<.05 #(4) REPLICATION 2 #4.1 sample sizes n.rep2=ifelse(sig.rep1,n.rep1,mapply(getn,d=d.ori*.5,power=.9)) df.rep2=ifelse(sig.rep1,df.rep1,2*n.rep2-2) #df.rep2=df.rep1 if rep1 is significant #4.2 Draw replication 2 if replication 1 did not "work", otherwise keep 1. t.rep2=ifelse(sig.rep1, t.rep1, qt(runif(n=simtot),df=df.rep2,ncp=sqrt(n.rep2/2)*d.true)) #4.3 p-values replication 2 p.rep2=ifelse(sig.rep1,p.rep1,2*(1-pt(abs(t.rep2),df=df.rep2))) sig.rep2=p.rep2<.05 #4.4 COMBINE 1 & 2 p.both=ifelse(sig.rep1, p.rep1, pnorm((qnorm(p.rep1)+qnorm(p.rep2))/sqrt(2))) n.both=ifelse(n.rep1==n.rep2,n.rep1,n.rep1+n.rep2) #(5) Print results cat("\nMean n1",mean(n.rep1)) cat("\nMean n2 (subset)",mean(n.rep2[n.rep2>n.rep1])) cat("\nMean both (total)",mean(n.both)) cat("\nShare p<.05 1st",sum(sig.rep1)/simtot) cat("\nShare p<.05 1st or 2nd",sum(sig.rep2)/simtot) cat("\nShare p<.05 stouffer",sum(p.both<.05)/simtot) #(6) save results results=list(n.rep1=n.rep1, n.rep2=n.rep2, sig.rep1=sig.rep1, sig.rep2=sig.rep2, p.rep1=p.rep1, p.rep2=p.rep2) results } #First examle, n=20, false-positive, n1=67; then n2=151 s1=sim7590(simtot=5000,n.ori=20,power=.05,seed=1074) #General, about 11 times as many s2=sim7590(simtot=1000,n.ori=50,power=.05,seed=1075) s3=sim7590(simtot=1000,n.ori=100,power=.05,seed=1076) s4=sim7590(simtot=1000,n.ori=250,power=.05,seed=1077) hist(s1$n.rep2) #p-hacked to .049, about 14 times s5=sim7590(simtot=1000,n.ori=20,p=.049,seed=1078) s6=sim7590(simtot=1000,n.ori=50,p=.049,seed=1079) s6=sim7590(simtot=1000,n.ori=100,p=.049,seed=1080)