## Reproducibility in paediatric versus early adults’ groups - Statistics

### Reproducibility metrics analysis

In [1]:
suppressWarnings(suppressMessages(library("car")))
suppressWarnings(suppressMessages(library("multcomp")))
suppressWarnings(suppressMessages(library("sjstats")))

library(car)
library(multcomp)
library(sjstats)
library(xtable)

path_resources <- file.path(getwd(), "..", "..", "resources", "data")

path_col <- file.path(path_resources, "r2_adult_ABIDE_I_a2009s.csv")
data_col <- read.csv(path_col)
data_col$site <- as.factor(data_col$site)

map_aov <- list("r_square"=aov(r_square ~ site + age + adult + snr_total, data = data_col), 
                "ICC"=aov(ICC ~ site + age + adult + snr_total, data = data_col))


for (rep_measure in c("r_square", "ICC")){
  
  print(sprintf("## #####################      %s      ################################", rep_measure))  
  
  my_anova <- map_aov[[rep_measure]]
  print(Anova(my_anova, type = "III"))
  
  latex_table_name = sprintf("ABIDEI_a2009s_%s_first_level.txt", rep_measure)
  df = anova_stats(my_anova)[, c("df", "sumsq", "statistic", "p.value", "cohens.f")]
  print(xtable(df, caption = sprintf("ABIDEI III %s", rep_measure)), file=latex_table_name)
    
}

[1] "## #####################      r_square      ################################"
Anova Table (Type III tests)

Response: r_square
            Sum Sq  Df F value    Pr(>F)    
(Intercept) 0.0150   1  1.2833   0.25778    
site        1.7094  16  9.1392 < 2.2e-16 ***
age         0.2516   1 21.5223 4.383e-06 ***
adult       0.0753   1  6.4400   0.01143 *  
snr_total   0.4672   1 39.9653 5.377e-10 ***
Residuals   6.3829 546                      
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
[1] "## #####################      ICC      ################################"
Anova Table (Type III tests)

Response: ICC
            Sum Sq  Df F value    Pr(>F)    
(Intercept) 0.2516   1 33.3265 1.308e-08 ***
site        1.0323  16  8.5451 < 2.2e-16 ***
age         0.1138   1 15.0705 0.0001162 ***
adult       0.0369   1  4.8849 0.0275072 *  
snr_total   0.2862   1 37.9030 1.441e-09 ***
Residuals   4.1223 546                      
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0

### Analysis of age effect on reproducibility metrics

In [2]:
for (rep_measure in c("r_square", "ICC")){
  
  print(sprintf("#######################      %s      ################################", rep_measure))  
  equation = sprintf("%s ~  age + snr_total", rep_measure)
  var_name = "age"
    
  adult = "ADULT"
  children = "PAEDIATRIC"
  
  n1 =  sum(data_col$adult == children)
  n2 = sum(data_col$adult == adult)
  
  degrees_freedom = n1+n1-2
  
  lmsite1 <- lm(data=data_col, equation, subset = adult==children)
  lmsite2 <- lm(data=data_col, equation, subset = adult==adult)
      
  
  for (var_name in c("age", "snr_total")){

      degrees_freedom = n1+n1-2

      m1 = summary(lmsite1)[["coefficients"]][var_name,"Estimate"]
      m2 = summary(lmsite2)[["coefficients"]][var_name,"Estimate"]

      s1 =  summary(lmsite1)[["coefficients"]][var_name, "Std. Error"] 
      s2 =  summary(lmsite2)[["coefficients"]][var_name, "Std. Error"] 

      mean_diff = m1 - m2

      sp = (((n1-1)*(s1**2) + (n2-1)*(s2**2))/(n1+n2-2))**0.5
      cohen_d <- mean_diff/sp

      dem_samples = (1/n1+1/n2)**0.5
      t = mean_diff/(sp*dem_samples)

      p_value <- pt(-abs(t),degrees_freedom)

      t_critical_value <- qt(1-.05/2, degrees_freedom)

      print(sprintf("\n----------- %s  Slope: Independent t-test -----------\n", var_name))  
      print(sprintf("%s-%s -> ndegree: %d diff: %f t-value: %f, t-critical-value:%f, p-value: %e", "FS", "CAT12", n1+n2-2, mean_diff, t, t_critical_value, p_value))
      cat("Cohen's d effect %d", cohen_d)
      cat("\n\n")
      }
}

[1] "#######################      r_square      ################################"
[1] "\n----------- age  Slope: Independent t-test -----------\n"
[1] "FS-CAT12 -> ndegree: 564 diff: 0.004801 t-value: 25.963468, t-critical-value:1.963066, p-value: 2.079170e-107"
Cohen's d effect %d 2.336521

[1] "\n----------- snr_total  Slope: Independent t-test -----------\n"
[1] "FS-CAT12 -> ndegree: 564 diff: 0.002100 t-value: 7.687089, t-critical-value:1.963066, p-value: 2.309082e-14"
Cohen's d effect %d 0.6917813

[1] "#######################      ICC      ################################"
[1] "\n----------- age  Slope: Independent t-test -----------\n"
[1] "FS-CAT12 -> ndegree: 564 diff: 0.003609 t-value: 23.723423, t-critical-value:1.963066, p-value: 5.235104e-94"
Cohen's d effect %d 2.134933

[1] "\n----------- snr_total  Slope: Independent t-test -----------\n"
[1] "FS-CAT12 -> ndegree: 564 diff: 0.001595 t-value: 7.131709, t-critical-value:1.963066, p-value: 1.148725e-12"
Cohen's d effect %d

In [3]:
summary(lmsite1)[["coefficients"]]

Unnamed: 0,Estimate,Std. Error,t value,Pr(>|t|)
(Intercept),0.495822903,0.035526955,13.956245,4.8100799999999996e-36
age,0.008899685,0.002015716,4.415149,1.316529e-05
snr_total,0.008866917,0.002741516,3.234311,0.001325814
