#R code for calculations of DataColada[54], properties of the "75x90" procedure to set sample size
#This code builds on code originally written for DataColada[4]
##Written by Uri Simonsohn (uws@wharton.upenn.edu)
#Please email me directly if you see any errors or have any comments.
#Last update: 2016 10 26
################################################################################
#Note: the program does not rely on simulations, instead, it approximates the distribution
# of expected effect sizes by evaluating the non-central distribution at every
# percentile between the level of power and 100%. It is a discretization every 1/10%
#OUTLINE
#(1) Basic functions
#(2) Key program that gets the distribution
#(3) Reproduce results from Colada[4], use observed effect size for replication
#(4) Figure in Colada[54] - use 75% of observed effect size, with 90% power
#4.1 Calculations
#4.2 Figure in blogpost
#4.3 In text, what if original has 35% power?
#4.4 In text, SD(n)/E(n)
library(pwr) #Need to install this package for power calculations
#(1) Basic functions
#Create simpler functions to compute power
#1.1 Function getn() gets sample size for a given effect size and desired power
getn = function(d,power) {
n=pwr.t.test(d=d,power=power)$n
return(round(n,digits=0))
}
#1.2Function getpower() for given sample and effect size
getpower = function(d,n) {return(pwr.t.test(d=d,n=n)$power)}
#(2) Main program: computes expected consequences in replication
# of setting sample size based on observed effect size
realpower=function(nori,powerori,powerrep,fraction=1) {
#SYNTAX:
#nori: per cell sample size of original study (comparing two means)
#powerori: true power of original study
#powerrep: claimed power of replication
#fraction: whats the original estimate multiplied by when computing power
# default is 1, meaning observed effect size is used.
#2.1 compute df, ncp, tc, true effect size
dfori=2*nori-2 #degrees of freedom
dori=pwr.t.test(n=nori,power=powerori)$d #true effect size leading to the set power for the original study given its sample size
ncpori=sqrt(nori)*dori #noncentrality parameter for student distribution given true effect and original sample size
tc=qt(.975,dfori) #critical t-value for p=.05 given original sample size
#2.2 generate distribution of (p<.05) original studies
#the way I do this is to get the distribution of possible t-values obtained, I start creating a vector with percentiles between 1-power and 99.9%
rp=seq(from=1-powerori+.001, to=.999,by=.001) #quantiles to be used in the non-central distribution below
#I then find the noncentral t-values associated with each of those percentiles, effectively the noncentral distribution every 1/1000 point
t_publish=qt(p=rp,df=dfori,ncp=sqrt(nori/2)*dori)
#convert that distribution of t-values into effect sizes
d_publish=(2*t_publish)/sqrt(dfori)
#This is a discretized approximateion of the distribution of effect size | p<.05, power and nori
#2.3 find nrep for powerrep
#for each of the possible values obtained, I compute the sample size a replicator would set seeking the claimed level of power
nrep=mapply(getn,d=d_publish*fraction,power=powerrep) #nrep: sample size of replication
#2.4 find actual power
#the true level of power is different from the published, that's why actual power is not claimed power, i here compute actual power
real=mapply(getpower,d=dori,n=nrep) #for every sample size run by the replicator, i compute the effective power
m=mean(real) #every sample size in the vector is equally likely (by construction, becuase of line 38 of code above) so the simple mean
#is the expected value of power
cat("Mean power: ",m,"\n")
return(list(real=real,nrep=nrep))
#So the list contains the
#$real which is the vector of actual power, and
#$nrep, the vecotor of replication sample sizes
}
#(3) Reproduce results from Colada[4], use observed effect size for replication
r1=realpower(nori=20,powerori=.5,powerrep=.80,fraction=1)
r2=realpower(nori=20,powerori=.5,powerrep=.90,fraction=1)
r3=realpower(nori=20,powerori=.5,powerrep=.95,fraction=1)
#These is the first Figure in DataColada.org/4, 51%, 62%, 70% power when claiming 80,90,95
#(4) Figure in Colada[54] - use 75% of observed effect size, with 90% power
#4.1 Calculations
r4=realpower(nori=50,powerori=.5,powerrep=.90,fraction=.75)
#4.2 Figure in blogpost
hist(r4$nrep,
breaks=20,
main="The roulette wheel setting sample size for theSample sizes based on 75% of observed effect size are noisy",
xlab='Replication sample size',
ylim=c(0,42))
abline(v=125,col='red',lty=2,lwd=3)
text(125,38,"Small Telescopes: n=125 (always)", col='red', cex=.8,pos=4)
mean(r4$nrep)
#4.3 In text, what if original has 35% power?
r45=realpower(nori=50,powerori=.35,powerrep=.90,fraction=.75)
#4.4 In text, SD(n)/E(n)
sd(r4$nrep)/mean(r4$nrep)
#5. Footnote about sample size not mattering, for all sample sizes, similar SD/E given power
#5.1 Run it
#Power:50%,
r5=realpower(nori=50,powerori=.5,powerrep=.90,fraction=.75)
r6=realpower(nori=200,powerori=.5,powerrep=.90,fraction=.75)
r7=realpower(nori=500,powerori=.5,powerrep=.90,fraction=.75)
#Power:80%
r8=realpower(nori=50,powerori=.80,powerrep=.90,fraction=.75)
r9=realpower(nori=200,powerori=.80,powerrep=.90,fraction=.75)
r10=realpower(nori=500,powerori=.80,powerrep=.90,fraction=.75)
#5.2 Compute SD/E
#Power:50%
sd(r5$nrep)/mean(r5$nrep)
sd(r6$nrep)/mean(r6$nrep)
sd(r7$nrep)/mean(r7$nrep)
#Power:80%
sd(r8$nrep)/mean(r8$nrep)
sd(r9$nrep)/mean(r9$nrep)
sd(r10$nrep)/mean(r10$nrep)