*(1) Load files provided by Larry Sanna (change file locations as needed);
data s2;
set "C:\Uri\Research\Fake data\Sanna\Data\Exp2"; ;
cond=Height;
run;
data s3;
set "C:\Uri\Research\Fake data\Sanna\Data\Exp3"; ;
cond=Height;
run;
data s4;
set "C:\Uri\Research\Fake data\Sanna\Data\Exp4"; ;
cond=Height;
run;
*note: study 4 has both the measure of # of fish returned and participants mood, in these simulations, because I am bootstrapping
individual participants I can include both. In the normal distribution based ones I only include # of fish because the correlationn with
mood needs to be taken into account;
*(2) Compute mean of the DVs by condition, add as a new variable, so that i can subtract later;
*note: Here I use a small macro I wrote that adds as a variable to teh dataset, the group-average of another variable;
%addmeanby(s2,time,cond);
%addmeanby(s3,hotsauce,cond);
%addmeanby(s4,fishret,cond);
%addmeanby(s4,avmood,cond);
*(3) Subtract condition means from each variable to bootstrap variances as in Boos & Brownie (Technometrics 1989) - V31(1) p.69-82 " Bootstrap methods for testing homgeneity of variances";
data s2; set s2; time=time-meantime; run;
data s3; set s3; hotsauce=hotsauce-meanhotsauce; run;
data s4; set s4; fishret=fishret-meanfishret; avmood=avmood-meanavmood; run;
*(4) GENERATE 100,000 BOOTSTRAPPED SAMPLES;
*note: I am drawing from the entire pool of observations of each experiment, so under the null that the three distributions are the same except for mean differences.
In an earlier version of these analyses, shared with UNC's investigative committee around January 2012, I was drawing from each condition separately. I was not at the time aware of
Boos & Brownie solution of subtracting means. The results are quite similar regardless, but when sampling as in the previous version, from each condition separately,
the distribution of p-values won't be uniform under the null that sample variances are the same;
proc surveyselect data=s2 out=cond21 method = urs sampsize=20 noprint outhits seed=100 rep = 100000;* where cond=1;run; data cond21;set cond21; rename time=time1; i=mod(_N_,20); if i=0 then i=20;run;
proc surveyselect data=s2 out=cond22 method = urs sampsize=20 noprint outhits seed=110 rep = 100000;* where cond=2;run; data cond22;set cond22; rename time=time2; i=mod(_N_,20); if i=0 then i=20;run;
proc surveyselect data=s2 out=cond23 method = urs sampsize=20 noprint outhits seed=120 rep = 100000;* where cond=3;run; data cond23;set cond23; rename time=time3; i=mod(_N_,20); if i=0 then i=20;run;
proc surveyselect data=s3 out=cond31 method = urs sampsize=15 noprint outhits seed=130 rep = 100000;* where cond=1;run; data cond31;set cond31; rename hotsauce=hs1; i=mod(_N_,15); if i=0 then i=15;run;
proc surveyselect data=s3 out=cond32 method = urs sampsize=15 noprint outhits seed=140 rep = 100000;* where cond=2;run; data cond32;set cond32; rename hotsauce=hs2; i=mod(_N_,15); if i=0 then i=15;run;
proc surveyselect data=s3 out=cond33 method = urs sampsize=15 noprint outhits seed=150 rep = 100000;* where cond=3;run; data cond33;set cond33; rename hotsauce=hs3; i=mod(_N_,15); if i=0 then i=15;run;
proc surveyselect data=s4 out=cond41 method = urs sampsize=15 noprint outhits seed=160 rep = 100000;* where cond=1;run; data cond41;set cond41; rename fishret=fish1 avmood=avmood1; i=mod(_N_,15); if i=0 then i=15;run;
proc surveyselect data=s4 out=cond42 method = urs sampsize=15 noprint outhits seed=170 rep = 100000;* where cond=2;run; data cond42;set cond42; rename fishret=fish2 avmood=avmood2; i=mod(_N_,15); if i=0 then i=15;run;
proc surveyselect data=s4 out=cond43 method = urs sampsize=15 noprint outhits seed=180 rep = 100000;* where cond=3;run; data cond43;set cond43; rename fishret=fish3 avmood=avmood3; i=mod(_N_,15); if i=0 then i=15;run;
*(4) MERGE ALL SIMULATIONS FOR THE THREE STUDIES INTO SINGLE FILE WHERE EACH ROW IS AN OBSERVATION IN A SIMULATION, EACH COLUMN A VARIABLE*CONDITION IN A STUDY;
*STUDIES WITH 15 OBS HAVE MISSING ENTRIES FOR N=n=[16,20];
*Note: I create a single matrix to make it more efficient when computing means;
data sim2;
merge cond21 cond22 cond23 cond31 cond32 cond33 cond41 cond42 cond43;
by replicate i;
*drop variables created in the bootsrap and those not to be used later;
drop Numberhits cond minutes seconds sectominutes seconds height;
run;
*(5) SUMMARY STATS;
* Compute SD for each condition in every one of the three studies for each simulation;
proc means data=sim2 noprint;
var time1-time3 hs1-hs3 fish1-fish3 avmood1-avmood3;
by replicate;
output out=m std=; *This saves the standard devaiations into a file calle m;
run;
*reaname SDs to match same layout as in the normal distribution based simulations ;
data m;
set m;
rename time1=sd1 time2=sd2 time3=sd3;
rename hs1=sd4 hs2=sd5 hs3=sd6;
rename fish1=sd7 fish2=sd8 fish3=sd9;
*these are the mood variables, which were not used in the normal distribution based simulations;
rename avmood1=sd10 avmood2=sd11 avmood3=sd12;
run;
*(6) CARRY OUT FINAL COMPUTATIONS TO AGGREGATE SDS ACROSS CONDITIONS;
data m1;
set m;
*Compute pooled standard deviations by study (SDP);
sdp_s1 =((19*(sd1**2)+19*(sd2**2)+19*(sd3**2))/57)**.5; *each condition has n=20, so n-1=19, multiplied by var (SD**2);
sdp_s2 =((14*(sd4**2)+14*(sd5**2)+14*(sd6**2))/42)**.5; *now n=15, n-1=14;
sdp_s3 =((14*(sd7**2)+14*(sd8**2)+14*(sd9**2))/42)**.5;
sdp_s3b=((14*(sd10**2)+14*(sd11**2)+14*(sd12**2))/42)**.5;
*Compute SE(SDp) by study with formula of SE(SD)=SD/sqrt(2n);
sep_s1 =sdp_s1 /(40**.5);
sep_s2 =sdp_s2 /(30**.5);
sep_s3 =sdp_s3 /(30**.5);
sep_s3b=sdp_s3b/(30**.5);
*Compute SD(SDs) of each study, and divide by the SE(SDP);
psi1= round(std(sd1,sd2,sd3)/sep_s1,.0001);
psi2= round(std(sd4,sd5,sd6)/sep_s2,.0001);
psi3= round(std(sd7,sd8,sd9)/sep_s3,.0001);
psi3b=round(std(sd10,sd11,sd12)/sep_s3b,.0001);
*aggreate three studies via simple average of psi;
*analogous to normal distribution, excludes mood;
psiall3=round(mean(psi1,psi2,psi3),.0001);
*all 4 variables:;
psiall4=round(mean(psi1,psi2,psi3,psi3b),.0001);
run;
*compute p-values;
* Each study (not in paper);
%pvalue(m1,psi1, .2378);
%pvalue(m1,psi2, .1168);
%pvalue(m1,psi3, .1666);
* all 3 studies with 4 dvs (main text);
%pvalue(m1,psiall4,.1802);
* all 3 studies, no mood (footnote);
%pvalue(m1,psiall3,.1737);