Reproducibility in different acquisition settings - Statistics
Contents
Reproducibility in different acquisition settings - Statistics#
Reproducibility metrics analysis#
suppressWarnings(suppressMessages(library("car")))
suppressWarnings(suppressMessages(library("sjstats")))
suppressWarnings(suppressMessages(library("broom")))
suppressWarnings(suppressMessages(library("multcomp")))
library("car")
library("broom")
library("multcomp")
library("sjstats")
library("xtable")
path_resources <- file.path(getwd(), "..", "..", "resources", "data")
path_col <- file.path(path_resources, "r2_repository_and_site_adult_a2009s.csv")
data_col <- read.csv(path_col)
data_col$repository_and_site <- as.factor(data_col$repository_and_site)
map_aov <- list("r_square"=aov(r_square ~ repository_and_site + age + snr_total, data = data_col),
"ICC"=aov(ICC ~ repository_and_site + age + snr_total, data = data_col))
for (rep_measure in c("r_square", "ICC")){
print(sprintf("## ##################### %s ################################", rep_measure))
my_anova <- map_aov[[rep_measure]]
print(Anova(my_anova, type = "III"))
pos_hoc <- summary(glht(my_anova, linfct = mcp(repository_and_site="Tukey")))
latex_table_name = sprintf("adult_a2009s_%s_first_level.txt", rep_measure)
latex_table_name_posthoc = sprintf("adult_a2009s_%s_first_level_posthoc.txt", rep_measure)
df = anova_stats(my_anova)[, c("df", "sumsq", "statistic", "p.value", "cohens.f")]
print(xtable(df, caption = sprintf("adult III %s", rep_measure)), file=latex_table_name)
print(pos_hoc)
}
[1] "## ##################### r_square ################################"
Anova Table (Type III tests)
Response: r_square
Sum Sq Df F value Pr(>F)
(Intercept) 0.4743 1 86.8504 < 2.2e-16 ***
repository_and_site 5.6296 3 343.6292 < 2.2e-16 ***
age 0.0074 1 1.3638 0.2431
snr_total 0.2305 1 42.2002 1.273e-10 ***
Residuals 5.6958 1043
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Simultaneous Tests for General Linear Hypotheses
Multiple Comparisons of Means: Tukey Contrasts
Fit: aov(formula = r_square ~ repository_and_site + age + snr_total,
data = data_col)
Linear Hypotheses:
Estimate Std. Error t value Pr(>|t|)
IXI|HH - IXI|Guys == 0 -0.075537 0.008529 -8.857 < 1e-04 ***
IXI|IOP - IXI|Guys == 0 -0.346548 0.011046 -31.372 < 1e-04 ***
OASIS3 - IXI|Guys == 0 0.062017 0.015593 3.977 0.000309 ***
IXI|IOP - IXI|HH == 0 -0.271011 0.010633 -25.489 < 1e-04 ***
OASIS3 - IXI|HH == 0 0.137554 0.020635 6.666 < 1e-04 ***
OASIS3 - IXI|IOP == 0 0.408565 0.021678 18.847 < 1e-04 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Adjusted p values reported -- single-step method)
[1] "## ##################### ICC ################################"
Anova Table (Type III tests)
Response: ICC
Sum Sq Df F value Pr(>F)
(Intercept) 0.8917 1 282.399 < 2.2e-16 ***
repository_and_site 3.0133 3 318.103 < 2.2e-16 ***
age 0.0022 1 0.700 0.403
snr_total 0.1572 1 49.781 3.129e-12 ***
Residuals 3.2933 1043
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Simultaneous Tests for General Linear Hypotheses
Multiple Comparisons of Means: Tukey Contrasts
Fit: aov(formula = ICC ~ repository_and_site + age + snr_total, data = data_col)
Linear Hypotheses:
Estimate Std. Error t value Pr(>|t|)
IXI|HH - IXI|Guys == 0 -0.062312 0.006485 -9.608 <1e-04 ***
IXI|IOP - IXI|Guys == 0 -0.255536 0.008400 -30.422 <1e-04 ***
OASIS3 - IXI|Guys == 0 0.056503 0.011857 4.765 <1e-04 ***
IXI|IOP - IXI|HH == 0 -0.193224 0.008085 -23.899 <1e-04 ***
OASIS3 - IXI|HH == 0 0.118814 0.015691 7.572 <1e-04 ***
OASIS3 - IXI|IOP == 0 0.312039 0.016484 18.930 <1e-04 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Adjusted p values reported -- single-step method)
Analysis of age effect on reproducibility metrics#
sites <- c("IXI|Guys", "IXI|HH", "IXI|IOP", "OASIS3")
site_combinations <- combn(sites, 2)
for (rep_measure in c("r_square", "ICC")){
print(sprintf("####################### %s ################################", rep_measure))
equation = sprintf("%s ~ age + snr_total", rep_measure)
var_name = "age"
t_values_list <- c()
p_values_list <- c()
cohen_d_list <- c()
comb_list <- c()
for (i in c(1, 2, 3, 4)){
site1 = site_combinations[1, i]
site2 = site_combinations[2, i]
n1 = sum(data_col$repository_and_site == site1)
n2 = sum(data_col$repository_and_site == site2)
degrees_freedom = n1+n1-2
lmsite1 <- lm(data=data_col, equation, subset = repository_and_site==site1)
lmsite2 <- lm(data=data_col, equation, subset = repository_and_site==site2)
for (var_name in c("age")){
degrees_freedom = n1+n1-2
m1 = summary(lmsite1)[["coefficients"]][var_name,"Estimate"]
m2 = summary(lmsite2)[["coefficients"]][var_name,"Estimate"]
s1 = summary(lmsite1)[["coefficients"]][var_name, "Std. Error"]
s2 = summary(lmsite2)[["coefficients"]][var_name, "Std. Error"]
mean_diff = m1 - m2
sp = (((n1-1)*(s1**2) + (n2-1)*(s2**2))/(n1+n2-2))**0.5
cohen_d <- mean_diff/sp
dem_samples = (1/n1+1/n2)**0.5
t = mean_diff/(sp*dem_samples)
p_value <- pt(-abs(t),degrees_freedom)
t_critical_value <- qt(1-.05/2, degrees_freedom)
#print(sprintf("\n----------- %s Slope: Independent t-test -----------\n", var_name))
#print(sprintf("%s-%s", site1, site2))
print(sprintf("%s-%s,ndegree,%d,diff,%f,t-value,%f,t-critical-value,%f,p-value,%e,cohen_d,%f", site1, site2, n1+n2-2, mean_diff, t, t_critical_value, p_value*4, cohen_d))
#cat("Cohen's d effect %d", cohen_d)
}
}
df <- data.frame("combination" = comb_list, "t" = t_values_list, "p-values" = p_values_list, "cohen-d" = cohen_d_list)
print(df)
for (site_name in sites){
lm_model <- lm(data=data_col, equation, subset = repository_and_site==site_name)
#print(sprintf("------ %s ------", site_name))
print(summary(lm_model))
}
}
[1] "####################### r_square ################################"
[1] "IXI|Guys-IXI|HH,ndegree,489,diff,-0.000526,t-value,-18.621937,t-critical-value,1.963785,p-value,1.517107e-61,cohen_d,-1.746071"
[1] "IXI|Guys-IXI|IOP,ndegree,377,diff,-0.000474,t-value,-9.608958,t-critical-value,1.963785,p-value,3.560140e-20,cohen_d,-1.293842"
[1] "IXI|Guys-OASIS3,ndegree,801,diff,-0.001824,t-value,-72.635786,t-critical-value,1.963785,p-value,1.024756e-305,cohen_d,-5.258845"
[1] "IXI|HH-IXI|IOP,ndegree,244,diff,0.000052,t-value,0.704264,t-critical-value,1.966650,p-value,9.634575e-01,cohen_d,0.100865"
data frame with 0 columns and 0 rows
Call:
lm(formula = equation, data = data_col, subset = repository_and_site ==
site_name)
Residuals:
Min 1Q Median 3Q Max
-0.30187 -0.03016 0.00730 0.03737 0.13859
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.6853462 0.0618708 11.077 < 2e-16 ***
age -0.0008522 0.0002197 -3.878 0.000128 ***
snr_total 0.0035083 0.0037393 0.938 0.348863
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.05494 on 309 degrees of freedom
Multiple R-squared: 0.07343, Adjusted R-squared: 0.06743
F-statistic: 12.24 on 2 and 309 DF, p-value: 7.631e-06
Call:
lm(formula = equation, data = data_col, subset = repository_and_site ==
site_name)
Residuals:
Min 1Q Median 3Q Max
-0.54301 -0.03577 0.00979 0.05212 0.15078
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.1657761 0.1145790 1.447 0.150
age -0.0003266 0.0004056 -0.805 0.422
snr_total 0.0299952 0.0063004 4.761 4.01e-06 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.08304 on 176 degrees of freedom
Multiple R-squared: 0.1487, Adjusted R-squared: 0.139
F-statistic: 15.37 on 2 and 176 DF, p-value: 7.043e-07
Call:
lm(formula = equation, data = data_col, subset = repository_and_site ==
site_name)
Residuals:
Min 1Q Median 3Q Max
-0.19328 -0.07797 0.01107 0.05633 0.22494
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.0239951 0.2435459 0.099 0.922
age -0.0003786 0.0007334 -0.516 0.607
snr_total 0.0223824 0.0146260 1.530 0.131
Residual standard error: 0.09817 on 64 degrees of freedom
Multiple R-squared: 0.0369, Adjusted R-squared: 0.006805
F-statistic: 1.226 on 2 and 64 DF, p-value: 0.3002
Call:
lm(formula = equation, data = data_col, subset = repository_and_site ==
site_name)
Residuals:
Min 1Q Median 3Q Max
-0.61705 -0.04036 0.01048 0.05118 0.15027
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.3700490 0.0614341 6.024 3.36e-09 ***
age 0.0009719 0.0004075 2.385 0.0175 *
snr_total 0.0228540 0.0049226 4.643 4.43e-06 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.07574 on 488 degrees of freedom
Multiple R-squared: 0.044, Adjusted R-squared: 0.04008
F-statistic: 11.23 on 2 and 488 DF, p-value: 1.705e-05
[1] "####################### ICC ################################"
[1] "IXI|Guys-IXI|HH,ndegree,489,diff,-0.000431,t-value,-19.559881,t-critical-value,1.963785,p-value,1.840020e-66,cohen_d,-1.834016"
[1] "IXI|Guys-IXI|IOP,ndegree,377,diff,-0.000101,t-value,-2.705734,t-critical-value,1.963785,p-value,1.400439e-02,cohen_d,-0.364326"
[1] "IXI|Guys-OASIS3,ndegree,801,diff,-0.001232,t-value,-66.994856,t-critical-value,1.963785,p-value,2.375112e-286,cohen_d,-4.850441"
[1] "IXI|HH-IXI|IOP,ndegree,244,diff,0.000330,t-value,5.436983,t-critical-value,1.966650,p-value,2.016726e-07,cohen_d,0.778685"
data frame with 0 columns and 0 rows
Call:
lm(formula = equation, data = data_col, subset = repository_and_site ==
site_name)
Residuals:
Min 1Q Median 3Q Max
-0.221405 -0.017658 0.005675 0.025261 0.085181
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.8014136 0.0395384 20.269 <2e-16 ***
age -0.0003206 0.0001404 -2.283 0.0231 *
snr_total 0.0026101 0.0023896 1.092 0.2756
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.03511 on 309 degrees of freedom
Multiple R-squared: 0.03455, Adjusted R-squared: 0.0283
F-statistic: 5.53 on 2 and 309 DF, p-value: 0.00437
Call:
lm(formula = equation, data = data_col, subset = repository_and_site ==
site_name)
Residuals:
Min 1Q Median 3Q Max
-0.63321 -0.02411 0.00857 0.03642 0.11312
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.3557039 0.0967595 3.676 0.000314 ***
age 0.0001105 0.0003426 0.323 0.747392
snr_total 0.0255363 0.0053205 4.800 3.38e-06 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.07013 on 176 degrees of freedom
Multiple R-squared: 0.1275, Adjusted R-squared: 0.1176
F-statistic: 12.86 on 2 and 176 DF, p-value: 6.138e-06
Call:
lm(formula = equation, data = data_col, subset = repository_and_site ==
site_name)
Residuals:
Min 1Q Median 3Q Max
-0.198372 -0.049290 0.006181 0.041516 0.157927
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.2974941 0.1957221 1.520 0.133
age -0.0002195 0.0005894 -0.372 0.711
snr_total 0.0183113 0.0117540 1.558 0.124
Residual standard error: 0.07889 on 64 degrees of freedom
Multiple R-squared: 0.03703, Adjusted R-squared: 0.006933
F-statistic: 1.23 on 2 and 64 DF, p-value: 0.299
Call:
lm(formula = equation, data = data_col, subset = repository_and_site ==
site_name)
Residuals:
Min 1Q Median 3Q Max
-0.70288 -0.02337 0.00782 0.03242 0.10325
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.5569727 0.0459679 12.117 < 2e-16 ***
age 0.0009114 0.0003049 2.989 0.00294 **
snr_total 0.0191092 0.0036833 5.188 3.12e-07 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.05667 on 488 degrees of freedom
Multiple R-squared: 0.05589, Adjusted R-squared: 0.05202
F-statistic: 14.45 on 2 and 488 DF, p-value: 8.039e-07