Reproducibility in different acquisition settings - Statistics#

Reproducibility metrics analysis#

suppressWarnings(suppressMessages(library("car")))
suppressWarnings(suppressMessages(library("sjstats")))
suppressWarnings(suppressMessages(library("broom")))
suppressWarnings(suppressMessages(library("multcomp")))

library("car")
library("broom")
library("multcomp")
library("sjstats")
library("xtable")

path_resources <- file.path(getwd(), "..", "..", "resources", "data")

path_col <- file.path(path_resources, "r2_repository_and_site_adult_a2009s.csv")
data_col <- read.csv(path_col)
data_col$repository_and_site <- as.factor(data_col$repository_and_site)

map_aov <- list("r_square"=aov(r_square ~ repository_and_site + age + snr_total, data = data_col), 
                "ICC"=aov(ICC ~ repository_and_site + age + snr_total, data = data_col))


for (rep_measure in c("r_square", "ICC")){
  
  print(sprintf("## #####################      %s      ################################", rep_measure))  
  
  my_anova <- map_aov[[rep_measure]]
  print(Anova(my_anova, type = "III"))
  pos_hoc <- summary(glht(my_anova, linfct = mcp(repository_and_site="Tukey")))
    
  latex_table_name = sprintf("adult_a2009s_%s_first_level.txt", rep_measure)
  latex_table_name_posthoc = sprintf("adult_a2009s_%s_first_level_posthoc.txt", rep_measure)
  df = anova_stats(my_anova)[, c("df", "sumsq", "statistic", "p.value", "cohens.f")]
  print(xtable(df, caption = sprintf("adult III %s", rep_measure)), file=latex_table_name)
  print(pos_hoc)
}
[1] "## #####################      r_square      ################################"
Anova Table (Type III tests)

Response: r_square
                    Sum Sq   Df  F value    Pr(>F)    
(Intercept)         0.4743    1  86.8504 < 2.2e-16 ***
repository_and_site 5.6296    3 343.6292 < 2.2e-16 ***
age                 0.0074    1   1.3638    0.2431    
snr_total           0.2305    1  42.2002 1.273e-10 ***
Residuals           5.6958 1043                       
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

	 Simultaneous Tests for General Linear Hypotheses

Multiple Comparisons of Means: Tukey Contrasts


Fit: aov(formula = r_square ~ repository_and_site + age + snr_total, 
    data = data_col)

Linear Hypotheses:
                         Estimate Std. Error t value Pr(>|t|)    
IXI|HH - IXI|Guys == 0  -0.075537   0.008529  -8.857  < 1e-04 ***
IXI|IOP - IXI|Guys == 0 -0.346548   0.011046 -31.372  < 1e-04 ***
OASIS3 - IXI|Guys == 0   0.062017   0.015593   3.977 0.000309 ***
IXI|IOP - IXI|HH == 0   -0.271011   0.010633 -25.489  < 1e-04 ***
OASIS3 - IXI|HH == 0     0.137554   0.020635   6.666  < 1e-04 ***
OASIS3 - IXI|IOP == 0    0.408565   0.021678  18.847  < 1e-04 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Adjusted p values reported -- single-step method)

[1] "## #####################      ICC      ################################"
Anova Table (Type III tests)

Response: ICC
                    Sum Sq   Df F value    Pr(>F)    
(Intercept)         0.8917    1 282.399 < 2.2e-16 ***
repository_and_site 3.0133    3 318.103 < 2.2e-16 ***
age                 0.0022    1   0.700     0.403    
snr_total           0.1572    1  49.781 3.129e-12 ***
Residuals           3.2933 1043                      
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

	 Simultaneous Tests for General Linear Hypotheses

Multiple Comparisons of Means: Tukey Contrasts


Fit: aov(formula = ICC ~ repository_and_site + age + snr_total, data = data_col)

Linear Hypotheses:
                         Estimate Std. Error t value Pr(>|t|)    
IXI|HH - IXI|Guys == 0  -0.062312   0.006485  -9.608   <1e-04 ***
IXI|IOP - IXI|Guys == 0 -0.255536   0.008400 -30.422   <1e-04 ***
OASIS3 - IXI|Guys == 0   0.056503   0.011857   4.765   <1e-04 ***
IXI|IOP - IXI|HH == 0   -0.193224   0.008085 -23.899   <1e-04 ***
OASIS3 - IXI|HH == 0     0.118814   0.015691   7.572   <1e-04 ***
OASIS3 - IXI|IOP == 0    0.312039   0.016484  18.930   <1e-04 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Adjusted p values reported -- single-step method)

Analysis of age effect on reproducibility metrics#

sites <- c("IXI|Guys", "IXI|HH", "IXI|IOP", "OASIS3")
site_combinations <- combn(sites, 2)


for (rep_measure in c("r_square", "ICC")){

  print(sprintf("#######################      %s      ################################", rep_measure))  
  equation = sprintf("%s ~ age  + snr_total", rep_measure)
  
  var_name = "age"
  
  t_values_list <- c()
  p_values_list <- c()
  cohen_d_list <- c()
  comb_list <- c()
  
  for (i in c(1, 2, 3, 4)){
      
    site1 = site_combinations[1, i]
    site2 = site_combinations[2, i]
    
    n1 =  sum(data_col$repository_and_site == site1)
    n2 = sum(data_col$repository_and_site == site2)
    
    degrees_freedom = n1+n1-2
    
    lmsite1 <- lm(data=data_col, equation, subset = repository_and_site==site1)
    lmsite2 <- lm(data=data_col, equation, subset = repository_and_site==site2)
      
              
      for (var_name in c("age")){

          degrees_freedom = n1+n1-2

          m1 = summary(lmsite1)[["coefficients"]][var_name,"Estimate"]
          m2 = summary(lmsite2)[["coefficients"]][var_name,"Estimate"]

          s1 =  summary(lmsite1)[["coefficients"]][var_name, "Std. Error"] 
          s2 =  summary(lmsite2)[["coefficients"]][var_name, "Std. Error"] 
          

          mean_diff = m1 - m2

          sp = (((n1-1)*(s1**2) + (n2-1)*(s2**2))/(n1+n2-2))**0.5
          cohen_d <- mean_diff/sp

          dem_samples = (1/n1+1/n2)**0.5
          t = mean_diff/(sp*dem_samples)

          p_value <- pt(-abs(t),degrees_freedom)

          t_critical_value <- qt(1-.05/2, degrees_freedom)

          #print(sprintf("\n----------- %s  Slope: Independent t-test -----------\n", var_name))
          #print(sprintf("%s-%s", site1, site2))
          print(sprintf("%s-%s,ndegree,%d,diff,%f,t-value,%f,t-critical-value,%f,p-value,%e,cohen_d,%f", site1, site2, n1+n2-2, mean_diff, t, t_critical_value, p_value*4, cohen_d))
          #cat("Cohen's d effect %d", cohen_d)
          }
  }
  df <- data.frame("combination" = comb_list, "t" = t_values_list, "p-values" = p_values_list, "cohen-d" = cohen_d_list)
  print(df)

for (site_name in sites){
  lm_model <- lm(data=data_col, equation, subset = repository_and_site==site_name)
  #print(sprintf("------ %s ------", site_name))
  print(summary(lm_model))
}
}
[1] "#######################      r_square      ################################"
[1] "IXI|Guys-IXI|HH,ndegree,489,diff,-0.000526,t-value,-18.621937,t-critical-value,1.963785,p-value,1.517107e-61,cohen_d,-1.746071"
[1] "IXI|Guys-IXI|IOP,ndegree,377,diff,-0.000474,t-value,-9.608958,t-critical-value,1.963785,p-value,3.560140e-20,cohen_d,-1.293842"
[1] "IXI|Guys-OASIS3,ndegree,801,diff,-0.001824,t-value,-72.635786,t-critical-value,1.963785,p-value,1.024756e-305,cohen_d,-5.258845"
[1] "IXI|HH-IXI|IOP,ndegree,244,diff,0.000052,t-value,0.704264,t-critical-value,1.966650,p-value,9.634575e-01,cohen_d,0.100865"
data frame with 0 columns and 0 rows

Call:
lm(formula = equation, data = data_col, subset = repository_and_site == 
    site_name)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.30187 -0.03016  0.00730  0.03737  0.13859 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)  0.6853462  0.0618708  11.077  < 2e-16 ***
age         -0.0008522  0.0002197  -3.878 0.000128 ***
snr_total    0.0035083  0.0037393   0.938 0.348863    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.05494 on 309 degrees of freedom
Multiple R-squared:  0.07343,	Adjusted R-squared:  0.06743 
F-statistic: 12.24 on 2 and 309 DF,  p-value: 7.631e-06


Call:
lm(formula = equation, data = data_col, subset = repository_and_site == 
    site_name)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.54301 -0.03577  0.00979  0.05212  0.15078 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)  0.1657761  0.1145790   1.447    0.150    
age         -0.0003266  0.0004056  -0.805    0.422    
snr_total    0.0299952  0.0063004   4.761 4.01e-06 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.08304 on 176 degrees of freedom
Multiple R-squared:  0.1487,	Adjusted R-squared:  0.139 
F-statistic: 15.37 on 2 and 176 DF,  p-value: 7.043e-07


Call:
lm(formula = equation, data = data_col, subset = repository_and_site == 
    site_name)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.19328 -0.07797  0.01107  0.05633  0.22494 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)
(Intercept)  0.0239951  0.2435459   0.099    0.922
age         -0.0003786  0.0007334  -0.516    0.607
snr_total    0.0223824  0.0146260   1.530    0.131

Residual standard error: 0.09817 on 64 degrees of freedom
Multiple R-squared:  0.0369,	Adjusted R-squared:  0.006805 
F-statistic: 1.226 on 2 and 64 DF,  p-value: 0.3002


Call:
lm(formula = equation, data = data_col, subset = repository_and_site == 
    site_name)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.61705 -0.04036  0.01048  0.05118  0.15027 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) 0.3700490  0.0614341   6.024 3.36e-09 ***
age         0.0009719  0.0004075   2.385   0.0175 *  
snr_total   0.0228540  0.0049226   4.643 4.43e-06 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.07574 on 488 degrees of freedom
Multiple R-squared:  0.044,	Adjusted R-squared:  0.04008 
F-statistic: 11.23 on 2 and 488 DF,  p-value: 1.705e-05

[1] "#######################      ICC      ################################"
[1] "IXI|Guys-IXI|HH,ndegree,489,diff,-0.000431,t-value,-19.559881,t-critical-value,1.963785,p-value,1.840020e-66,cohen_d,-1.834016"
[1] "IXI|Guys-IXI|IOP,ndegree,377,diff,-0.000101,t-value,-2.705734,t-critical-value,1.963785,p-value,1.400439e-02,cohen_d,-0.364326"
[1] "IXI|Guys-OASIS3,ndegree,801,diff,-0.001232,t-value,-66.994856,t-critical-value,1.963785,p-value,2.375112e-286,cohen_d,-4.850441"
[1] "IXI|HH-IXI|IOP,ndegree,244,diff,0.000330,t-value,5.436983,t-critical-value,1.966650,p-value,2.016726e-07,cohen_d,0.778685"
data frame with 0 columns and 0 rows

Call:
lm(formula = equation, data = data_col, subset = repository_and_site == 
    site_name)

Residuals:
      Min        1Q    Median        3Q       Max 
-0.221405 -0.017658  0.005675  0.025261  0.085181 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)  0.8014136  0.0395384  20.269   <2e-16 ***
age         -0.0003206  0.0001404  -2.283   0.0231 *  
snr_total    0.0026101  0.0023896   1.092   0.2756    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.03511 on 309 degrees of freedom
Multiple R-squared:  0.03455,	Adjusted R-squared:  0.0283 
F-statistic:  5.53 on 2 and 309 DF,  p-value: 0.00437


Call:
lm(formula = equation, data = data_col, subset = repository_and_site == 
    site_name)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.63321 -0.02411  0.00857  0.03642  0.11312 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) 0.3557039  0.0967595   3.676 0.000314 ***
age         0.0001105  0.0003426   0.323 0.747392    
snr_total   0.0255363  0.0053205   4.800 3.38e-06 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.07013 on 176 degrees of freedom
Multiple R-squared:  0.1275,	Adjusted R-squared:  0.1176 
F-statistic: 12.86 on 2 and 176 DF,  p-value: 6.138e-06


Call:
lm(formula = equation, data = data_col, subset = repository_and_site == 
    site_name)

Residuals:
      Min        1Q    Median        3Q       Max 
-0.198372 -0.049290  0.006181  0.041516  0.157927 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)
(Intercept)  0.2974941  0.1957221   1.520    0.133
age         -0.0002195  0.0005894  -0.372    0.711
snr_total    0.0183113  0.0117540   1.558    0.124

Residual standard error: 0.07889 on 64 degrees of freedom
Multiple R-squared:  0.03703,	Adjusted R-squared:  0.006933 
F-statistic:  1.23 on 2 and 64 DF,  p-value: 0.299


Call:
lm(formula = equation, data = data_col, subset = repository_and_site == 
    site_name)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.70288 -0.02337  0.00782  0.03242  0.10325 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) 0.5569727  0.0459679  12.117  < 2e-16 ***
age         0.0009114  0.0003049   2.989  0.00294 ** 
snr_total   0.0191092  0.0036833   5.188 3.12e-07 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.05667 on 488 degrees of freedom
Multiple R-squared:  0.05589,	Adjusted R-squared:  0.05202 
F-statistic: 14.45 on 2 and 488 DF,  p-value: 8.039e-07