Analysis of the Error per Age Bin¶

import os
import math
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from pathlib import Path

path_files_test = os.path.join('..', '..', '..', 'resources')

all_data = []
for path in Path(path_files_test).glob("*test.csv"):
    if "gm_wm" in path.name:
        continue
    df_mod = pd.read_csv(path)
    df_mod["modality"] = "+".join(path.name.split("_")[:-1]).upper()
    all_data.append(df_mod)
df = pd.concat(all_data)
df.head(4)

	subjectID	gender	age	y_hat	modality
0	sub-474	1	34.01	40.339008	DF+GM
1	sub-292	1	23.73	31.652921	DF+GM
2	sub-382	1	65.91	46.452941	DF+GM
3	sub-434	1	67.24	56.620143	DF+GM

colors = ["#1E88E5", "#FFC107", "#004D40","#909842", "#AB1111"]
sns.scatterplot(x="age", y="y_hat", hue="modality", data=df, 
            palette=colors)
plt.plot([20, 85], [20, 85], 'k--')
plt.xlabel("Chronological Age [years]")
plt.ylabel("Predicted Age [years]")
plt.legend(ncol=5, bbox_to_anchor=(1, 1.15))
plt.savefig(f"predicted_age_vs_age.png")

df["age_bin"] = df["age"].round(1).apply(lambda x: math.floor(x/10)*10)#.groupby("age")
df["age_bin"] = df["age_bin"] - (df["age_bin"]==80).astype(int)*10

df_plot_metrics = df.groupby(["age_bin", "modality"]).apply(lambda x: pd.Series([x["age"].mean(), x["y_hat"].mean(), x["y_hat"].std()],
                                                                      index=["age_mean", "y_hat_mean", "y_hat_std"])).reset_index()


for i, mod in enumerate(["DF", "GM", "WM", "CSF", "DF+GM"]):
    df_mod = df_plot_metrics.loc[df_plot_metrics.modality==mod]
    plt.errorbar(df_mod["age_mean"], df_mod["y_hat_mean"], yerr=df_mod["y_hat_std"], 
                 label=mod, color=colors[i])
plt.plot([df_plot_metrics["age_mean"].min(), df_plot_metrics["age_mean"].max()], [df_plot_metrics["age_mean"].min(), 
                                                                                  df_plot_metrics["age_mean"].max()], 'k--')
plt.xlabel("Chronological Age [years]")
plt.ylabel("Predicted Age [years]")
plt.legend(ncol=5, bbox_to_anchor=(1, 1.15))

<matplotlib.legend.Legend at 0x7fc532eded00>

df_groups = df.groupby(["modality", "age_bin"]).mean()[["age", "y_hat"]].round(2).reset_index()

df_groups["Difference [years]"] = (df_groups["age"] - df_groups["y_hat"]).round(2)
df_groups.rename(columns={"y_hat": "Mean predictions [years]", "age": "Mean age [years]",
                          "age_bin": "Age bin"}, inplace=True)

for el in df_groups.modality.unique():
    print(f"\n##########################            {el}            ##########################\n")
    print(df_groups.loc[df_groups.modality==el].loc[:, df_groups.columns!="modality"])
    print("\n")

##########################            CSF            ##########################

   Age bin  Mean age [years]  Mean predictions [years]  Difference [years]
     20             26.09                     39.43              -13.34
     30             34.56                     42.96               -8.40
     40             45.74                     49.52               -3.78
     50             56.12                     64.64               -8.52
     60             64.79                     63.75                1.04
     70             75.19                     75.59               -0.40



##########################            DF            ##########################

    Age bin  Mean age [years]  Mean predictions [years]  Difference [years]
      20             26.09                     35.23               -9.14
      30             34.56                     39.52               -4.96
      40             45.74                     47.86               -2.12
      50             56.12                     64.74               -8.62
     60             64.79                     60.42                4.37
     70             75.19                     71.20                3.99



##########################            DF+GM            ##########################

    Age bin  Mean age [years]  Mean predictions [years]  Difference [years]
     20             26.09                     34.39               -8.30
     30             34.56                     38.62               -4.06
     40             45.74                     47.17               -1.43
     50             56.12                     64.06               -7.94
     60             64.79                     59.82                4.97
     70             75.19                     71.77                3.42



##########################            GM            ##########################

    Age bin  Mean age [years]  Mean predictions [years]  Difference [years]
     20             26.09                     36.38              -10.29
     30             34.56                     39.66               -5.10
     40             45.74                     46.70               -0.96
     50             56.12                     60.09               -3.97
     60             64.79                     56.84                7.95
     70             75.19                     67.77                7.42



##########################            WM            ##########################

    Age bin  Mean age [years]  Mean predictions [years]  Difference [years]
     20             26.09                     31.78               -5.69
     30             34.56                     31.93                2.63
     40             45.74                     40.12                5.62
     50             56.12                     43.59               12.53
     60             64.79                     47.60               17.19
     70             75.19                     51.07               24.12

Brain Age and Deformation Fields

Analysis of the Error per Age Bin

Analysis of the Error per Age Bin¶