added comparison without ökologie

2026-02-28 16:12:09 +01:00
parent 9e50db2da6
commit b1039e6a7f
48 changed files with 977 additions and 2 deletions
--- a/Data/plots/A1_trajectory.png
+++ b/Data/plots/A1_trajectory.png
--- a/Data/plots/A2_trajectory_by_medium.png
+++ b/Data/plots/A2_trajectory_by_medium.png
--- a/Data/plots/A3_trajectory_by_topic.png
+++ b/Data/plots/A3_trajectory_by_topic.png
--- a/Data/plots/A4_heatmap.png
+++ b/Data/plots/A4_heatmap.png
--- a/Data/plots/B1_tutoring_slopes_by_medium.png
+++ b/Data/plots/B1_tutoring_slopes_by_medium.png
--- a/Data/plots/B2_tutoring_slopes_by_topic.png
+++ b/Data/plots/B2_tutoring_slopes_by_topic.png
--- a/Data/plots/B3_tutoring_gain_by_medium.png
+++ b/Data/plots/B3_tutoring_gain_by_medium.png
--- a/Data/plots/B4_tutoring_medium_topic.png
+++ b/Data/plots/B4_tutoring_medium_topic.png
--- a/Data/plots/B5_tutoring_dashboard.png
+++ b/Data/plots/B5_tutoring_dashboard.png
--- a/Data/plots/C1_start_to_finish.png
+++ b/Data/plots/C1_start_to_finish.png
--- a/Data/plots/C2_learning_gains.png
+++ b/Data/plots/C2_learning_gains.png
--- a/Data/plots/D1_confidence_vs_score.png
+++ b/Data/plots/D1_confidence_vs_score.png
--- a/Data/plots/D2_delta_conf_vs_score.png
+++ b/Data/plots/D2_delta_conf_vs_score.png
--- a/Data/plots/D3_calibration.png
+++ b/Data/plots/D3_calibration.png
--- a/Data/plots/E1_personality_correlations.png
+++ b/Data/plots/E1_personality_correlations.png
--- a/Data/plots/E2_trait_vs_score_gain.png
+++ b/Data/plots/E2_trait_vs_score_gain.png
--- a/Data/plots_effects/F1_cohens_d_comparison.png
+++ b/Data/plots_effects/F1_cohens_d_comparison.png
--- a/Data/plots_effects/F2_mean_gain_comparison.png
+++ b/Data/plots_effects/F2_mean_gain_comparison.png
--- a/Data/plots_effects/F3_paired_slopes_comparison.png
+++ b/Data/plots_effects/F3_paired_slopes_comparison.png
--- a/Data/plots_effects/F4_gain_distribution_comparison.png
+++ b/Data/plots_effects/F4_gain_distribution_comparison.png
--- a/Data/plots_effects/F5_stats_table.png
+++ b/Data/plots_effects/F5_stats_table.png
--- a/Data/plots_effects/G1_effect_per_topic.png
+++ b/Data/plots_effects/G1_effect_per_topic.png
--- a/Data/plots_effects/G2_slopes_per_topic.png
+++ b/Data/plots_effects/G2_slopes_per_topic.png
--- a/Data/plots_effects/H1_medium_topic_grid.png
+++ b/Data/plots_effects/H1_medium_topic_grid.png
--- a/Data/plots_effects/I1_outlier_scatter.png
+++ b/Data/plots_effects/I1_outlier_scatter.png
--- a/Data/plots_effects/I2_outlier_effect_comparison.png
+++ b/Data/plots_effects/I2_outlier_effect_comparison.png
--- a/Data/plots_effects/I3_outlier_heatmap.png
+++ b/Data/plots_effects/I3_outlier_heatmap.png
--- a/Data/plots_questionnaires/Q01_imi_by_medium.png
+++ b/Data/plots_questionnaires/Q01_imi_by_medium.png
--- a/Data/plots_questionnaires/Q02_sus_by_medium.png
+++ b/Data/plots_questionnaires/Q02_sus_by_medium.png
--- a/Data/plots_questionnaires/Q03_ueqs_by_medium.png
+++ b/Data/plots_questionnaires/Q03_ueqs_by_medium.png
--- a/Data/plots_questionnaires/Q04_nasatlx_by_medium.png
+++ b/Data/plots_questionnaires/Q04_nasatlx_by_medium.png
--- a/Data/plots_questionnaires/Q05_nasatlx_comparison.png
+++ b/Data/plots_questionnaires/Q05_nasatlx_comparison.png
--- a/Data/plots_questionnaires/Q06_godspeed_by_medium.png
+++ b/Data/plots_questionnaires/Q06_godspeed_by_medium.png
--- a/Data/plots_questionnaires/Q07_social_presence_by_medium.png
+++ b/Data/plots_questionnaires/Q07_social_presence_by_medium.png
--- a/Data/plots_questionnaires/Q08_cybersickness_by_medium.png
+++ b/Data/plots_questionnaires/Q08_cybersickness_by_medium.png
--- a/Data/plots_questionnaires/Q09_pre_session_states.png
+++ b/Data/plots_questionnaires/Q09_pre_session_states.png
--- a/Data/plots_questionnaires/Q10_additional_measures.png
+++ b/Data/plots_questionnaires/Q10_additional_measures.png
--- a/Data/plots_questionnaires/Q11_correlation_heatmap.png
+++ b/Data/plots_questionnaires/Q11_correlation_heatmap.png
--- a/Data/plots_questionnaires/Q12_phase_comparison_dashboard.png
+++ b/Data/plots_questionnaires/Q12_phase_comparison_dashboard.png
--- a/Data/plots_questionnaires/Q13_vr_specific.png
+++ b/Data/plots_questionnaires/Q13_vr_specific.png
--- a/Data/stats/effects_by_medium_topic_grid.csv
+++ b/Data/stats/effects_by_medium_topic_grid.csv
@ -0,0 +1,10 @@
+Medium,Topic,N,Mean_Gain,SD_Gain,Cohens_d,t_stat,p_value
+Chat,Mendel,6,14.467,11.5,1.258,-3.082,0.0274
+Chat,DNA-Replikation,6,22.217,20.952,1.06,-2.597,0.0484
+Chat,Ökologie,6,-3.35,3.67,-0.913,2.236,0.0756
+Video,Mendel,6,4.417,3.421,1.291,-3.162,0.025
+Video,DNA-Replikation,6,16.667,32.092,0.519,-1.272,0.2593
+Video,Ökologie,6,-0.017,8.443,-0.002,0.005,0.9963
+VR,Mendel,6,14.45,12.959,1.115,-2.731,0.0412
+VR,DNA-Replikation,6,11.117,13.134,0.846,-2.073,0.0929
+VR,Ökologie,6,15.55,36.21,0.429,-1.052,0.341
--- a/Data/stats/effects_by_medium_with_without_oekologie.csv
+++ b/Data/stats/effects_by_medium_with_without_oekologie.csv
@ -0,0 +1,7 @@
+Medium,Condition,N,Mean_Gain,SD_Gain,Cohens_d,t_stat,p_value
+Chat,All Topics,18,11.111,17.125,0.649,-2.753,0.0136
+Chat,Excl_Oekologie,12,18.342,16.614,1.104,-3.824,0.0028
+Video,All Topics,18,7.022,19.494,0.36,-1.528,0.1448
+Video,Excl_Oekologie,12,10.542,22.68,0.465,-1.61,0.1357
+VR,All Topics,18,13.706,22.125,0.619,-2.628,0.0176
+VR,Excl_Oekologie,12,12.783,12.561,1.018,-3.525,0.0048
--- a/Data/stats/effects_by_topic.csv
+++ b/Data/stats/effects_by_topic.csv
@ -0,0 +1,4 @@
+Topic,N,Mean_Gain,SD_Gain,Cohens_d,t_stat,p_value
+Mendel,18,11.111,10.745,1.034,-4.387,0.0004
+DNA-Replikation,18,16.667,22.461,0.742,-3.148,0.0059
+Ökologie,18,4.061,21.964,0.185,-0.784,0.4436
--- a/Data/stats/outlier_influence.csv
+++ b/Data/stats/outlier_influence.csv
@ -0,0 +1,4 @@
+Medium,N_all,Mean_Gain_all,Cohens_d_all,p_all,N_no_outliers,Mean_Gain_no_outliers,Cohens_d_no_outliers,p_no_outliers,Delta_d,Outliers
+Chat,18,11.111,0.649,0.0136,17,8.235,0.665,0.0145,0.016,P8/DNA-Replikation(+60.0%)
+Video,18,7.022,0.36,0.1448,16,1.225,0.166,0.5178,-0.195,P3/DNA-Replikation(+33.4%); P10/DNA-Replikation(+73.4%)
+VR,18,13.706,0.619,0.0176,17,9.412,0.727,0.0085,0.108,P3/Ökologie(+86.7%)
--- a/README.md
+++ b/README.md
@ -286,6 +286,101 @@ Scatter plots of each Big Five trait against tutoring score gain, with regressio

 ---

+## G. Effect Analysis
+
+> Generated by `generate_plots_effects.py` → `Data/plots_effects/`
+> Statistical exports → `Data/stats/effects_*.csv`, `Data/stats/outlier_influence.csv`
+
+---
+
+### G-F. Effect Without Ökologie (vs. With)
+
+Ökologie has markedly higher pre-tutoring baselines (ceiling effects), which compresses gains for that topic. This section quantifies how much those ceiling effects suppress the observed effect sizes, and presents a full side-by-side comparison of all mediums with and without Ökologie included.
+
+#### GF1 – Cohen's d Comparison by Medium
+
+![Cohen's d comparison](Data/plots_effects/F1_cohens_d_comparison.png)
+
+Grouped bar chart of Cohen's d per medium under two conditions: All Topics and Excl. Ökologie. Each bar is annotated with the raw mean gain and significance stars. Reference lines mark the conventional small (0.2), medium (0.5), and large (0.8) effect size thresholds. Removing Ökologie consistently raises effect sizes for all mediums, with VR showing the largest absolute shift.
+
+#### GF2 – Mean Score Gain Comparison
+
+![Mean score gain comparison](Data/plots_effects/F2_mean_gain_comparison.png)
+
+95% CI bar chart of the raw mean tutoring score gain per medium, both conditions overlaid. Shows the absolute gain shift when Ökologie is excluded. VR and Chat benefit most from exclusion; Video's gain changes least, indicating Video's Ökologie sessions were not as strongly ceiling-affected.
+
+#### GF3 – Paired Slopes: All Topics vs. Excl. Ökologie
+
+![Paired slopes comparison](Data/plots_effects/F3_paired_slopes_comparison.png)
+
+A 2×3 grid (rows: All Topics / Excl. Ökologie; columns: Chat / Video / VR). Each panel shows individual Pre→Post-Tutoring lines colored by topic, the medium mean trajectory (thick diamond marker), and annotated t-test / Cohen's d / p-value. The bottom row directly reveals the cleaner separation in trajectories once the near-zero Ökologie gains are removed.
+
+#### GF4 – Gain Distribution Comparison
+
+![Gain distribution comparison](Data/plots_effects/F4_gain_distribution_comparison.png)
+
+Side-by-side violin + box plots per medium, two per medium (All Topics / Excl. Ökologie). Shows the shift in median, spread, and the location of extreme values. For VR in particular, removing Ökologie tightens the distribution and raises the median, confirming Ökologie's pull toward zero.
+
+#### GF5 – Descriptive Statistics Table
+
+![Stats table](Data/plots_effects/F5_stats_table.png)
+
+Rendered table summarizing N, mean gain, SD, Cohen's d, t-statistic, and p-value for all 6 conditions (3 mediums × 2 topic sets) in one view.
+
+---
+
+### G-G. Effect Per Topic
+
+Full effect-size breakdown for each of the three topics independently, across all mediums combined.
+
+#### GG1 – Effect Per Topic (Gain + Cohen's d)
+
+![Effect per topic](Data/plots_effects/G1_effect_per_topic.png)
+
+Left panel: mean tutoring score gain with 95% CI error bars per topic, annotated with N and significance. Right panel: Cohen's d per topic with threshold reference lines. DNA-Replikation yields the largest effect (high starting deficit → large gain), Mendel is intermediate, and Ökologie is smallest (ceiling effects).
+
+#### GG2 – Paired Slopes per Topic
+
+![Paired slopes per topic](Data/plots_effects/G2_slopes_per_topic.png)
+
+Three-panel slope plot (one per topic), with lines colored by medium. Medium mean trajectories are drawn as thick diamond markers and labeled with per-medium gains. The overall t-test / d / p annotation summarizes the within-topic effect. Ökologie clearly shows compressed trajectories compared to DNA-Replikation.
+
+---
+
+### G-H. All Medium × Topic Combinations
+
+#### GH1 – 3×3 Slope Grid (Medium × Topic)
+
+![3x3 slope grid](Data/plots_effects/H1_medium_topic_grid.png)
+
+A 3×3 grid with rows = mediums (Chat, Video, VR) and columns = topics (Mendel, DNA-Replikation, Ökologie). Each of the 9 cells shows individual participant Pre→Post-Tutoring slope lines (colored by topic), the medium mean (thick line), and the annotated effect size (d, p, n). This is the most granular view: VR × DNA-Replikation shows the largest gains while all mediums × Ökologie show compressed or near-zero gains.
+
+---
+
+### G-I. Outlier Influence Analysis
+
+Outliers are defined using the 1.5×IQR rule applied per medium on tutoring `Score_Gain`.
+
+#### GI1 – Score Gain Scatter with Outlier Flags
+
+![Outlier scatter](Data/plots_effects/I1_outlier_scatter.png)
+
+Jittered scatter of individual score gains per medium. IQR fence lines (±1.5×IQR) are marked in red. Outlier points are highlighted in red and labeled with participant ID and topic name. P3/Ökologie (VR, +86.7 pp) is the most extreme single data point.
+
+#### GI2 – Outlier Influence on Effect Sizes
+
+![Outlier effect comparison](Data/plots_effects/I2_outlier_effect_comparison.png)
+
+Left: grouped bar chart of Cohen's d with All Data vs. Outliers Removed, annotated with raw gains and significance. Right: Δd bar chart showing the change in effect size after outlier removal per medium. A positive Δd means the outlier(s) were suppressing the true effect; a negative Δd means they were inflating it.
+
+#### GI3 – Outlier Heatmap (Participant × Topic per Medium)
+
+![Outlier heatmap](Data/plots_effects/I3_outlier_heatmap.png)
+
+Heatmap of tutoring score gain for each participant × topic cell, one panel per medium. Color encodes gain magnitude (red–yellow–green). Cells with a red border are IQR outliers within that medium's distribution. Allows immediate identification of which participant-topic combinations drive extreme results.
+
+---
+
 ## F. Questionnaire Analysis

 > Questionnaires were administered at multiple phases: Pre-Reading, Post-Reading, Pre-Tutoring, and Post-Tutoring.
--- a/generate_plots.py
+++ b/generate_plots.py
@ -30,7 +30,7 @@ from scipy import stats
 # =============================================================================
 # CONFIG
 # =============================================================================
-BASE = Path(r"F:\GitHub Projekte\VirTu-Eval\Data")
+BASE = Path(__file__).resolve().parent / "Data"
 PLOT_DIR = BASE / "plots"
 PLOT_DIR.mkdir(exist_ok=True)
 STATS_DIR = BASE / "stats"
--- a/generate_plots_effects.py
+++ b/generate_plots_effects.py
@ -0,0 +1,855 @@
+"""
+generate_plots_effects.py
+
+Effect-focused analysis for VirTu-Eval experiment data.
+Generates plots into Data/plots_effects/ organized by section:
+
+  F. Effect Without Ökologie (vs. With)  – 5 plots
+  G. Effect Per Topic                    – 2 plots
+  H. All Medium × Topic Combinations     – 1 plot (3×3 grid)
+  I. Outlier Influence Analysis          – 3 plots
+
+Usage:
+  python generate_plots_effects.py
+"""
+
+import pandas as pd
+import numpy as np
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+import matplotlib.patches as mpatches
+from matplotlib.lines import Line2D
+import seaborn as sns
+from pathlib import Path
+from scipy import stats
+
+# =============================================================================
+# CONFIG
+# =============================================================================
+BASE = Path("Data")
+PLOT_DIR = BASE / "plots_effects"
+PLOT_DIR.mkdir(parents=True, exist_ok=True)
+STATS_DIR = BASE / "stats"
+STATS_DIR.mkdir(exist_ok=True)
+
+PHASE_ORDER  = ['Pre-Reading', 'Post-Reading', 'Pre-Tutoring', 'Post-Tutoring']
+PHASE_LABELS = ['Pre-Read', 'Post-Read', 'Pre-Tutor', 'Post-Tutor']
+PHASE_SHORT  = dict(zip(PHASE_ORDER, PHASE_LABELS))
+
+MEDIUM_ORDER  = ['Chat', 'Video', 'VR']
+MEDIUM_COLORS = {'Chat': '#2196F3', 'Video': '#FF9800', 'VR': '#4CAF50'}
+
+TOPIC_ORDER  = ['Mendel', 'DNA-Replikation', 'Ökologie']
+TOPIC_COLORS = {'Mendel': '#E91E63', 'DNA-Replikation': '#9C27B0', 'Ökologie': '#009688'}
+
+TOPICS_NO_OEK = ['Mendel', 'DNA-Replikation']
+
+sns.set_theme(style="whitegrid", font_scale=1.05)
+plt.rcParams['figure.dpi'] = 150
+plt.rcParams['savefig.bbox'] = 'tight'
+
+
+# =============================================================================
+# HELPERS
+# =============================================================================
+
+def cohens_d(pre, post):
+    diff = post - pre
+    return diff.mean() / diff.std(ddof=1) if diff.std(ddof=1) > 0 else 0.0
+
+
+def sig_stars(p):
+    if p < 0.001:
+        return '***'
+    elif p < 0.01:
+        return '**'
+    elif p < 0.05:
+        return '*'
+    return 'n.s.'
+
+
+def compute_effect(sub):
+    """Return (mean_gain, sd_gain, sem_gain, d, t, p, n) for a paired subset."""
+    pre  = sub['Pre_Score']
+    post = sub['Post_Score']
+    n = len(sub)
+    if n < 2:
+        return sub['Score_Gain'].mean(), sub['Score_Gain'].std(), np.nan, np.nan, np.nan, np.nan, n
+    t, p = stats.ttest_rel(pre, post)
+    d    = cohens_d(pre, post)
+    g    = sub['Score_Gain']
+    return g.mean(), g.std(ddof=1), g.sem(), d, t, p, n
+
+
+def iqr_outlier_mask(series):
+    """Return boolean Series: True where value is an IQR outlier (1.5×IQR rule)."""
+    q1, q3 = series.quantile(0.25), series.quantile(0.75)
+    iqr = q3 - q1
+    return (series < q1 - 1.5 * iqr) | (series > q3 + 1.5 * iqr)
+
+
+# =============================================================================
+# DATA LOADING
+# =============================================================================
+
+def load_data():
+    df = pd.read_csv(BASE / "test_scores_all.csv", encoding="utf-8-sig")
+    df['Zeitpunkt'] = df['Zeitpunkt'].str.strip().replace('Pre-Tutor', 'Pre-Tutoring')
+    df['Phase'] = pd.Categorical(df['Zeitpunkt'], categories=PHASE_ORDER, ordered=True)
+    df['P_Num'] = df['Participant'].str.extract(r'(\d+)').astype(int)
+    return df
+
+
+def build_paired_tutoring(df):
+    pre = df[df['Zeitpunkt'] == 'Pre-Tutoring'][
+        ['Participant', 'Topic', 'Medium', 'Score_Pct', 'Avg_Confidence']].copy()
+    post = df[df['Zeitpunkt'] == 'Post-Tutoring'][
+        ['Participant', 'Topic', 'Medium', 'Score_Pct', 'Avg_Confidence']].copy()
+    pre.columns  = ['Participant', 'Topic', 'Medium', 'Pre_Score',  'Pre_Conf']
+    post.columns = ['Participant', 'Topic', 'Medium', 'Post_Score', 'Post_Conf']
+    paired = pre.merge(post, on=['Participant', 'Topic', 'Medium'])
+    paired['Score_Gain'] = paired['Post_Score'] - paired['Pre_Score']
+    paired['Conf_Gain']  = paired['Post_Conf']  - paired['Pre_Conf']
+    paired['P_Num'] = paired['Participant'].str.extract(r'(\d+)').astype(int)
+    return paired
+
+
+# =============================================================================
+# F. EFFECT WITHOUT ÖKOLOGIE (vs. WITH)
+# =============================================================================
+
+def plot_F1_cohens_d_comparison(paired):
+    """Bar chart: Cohen's d per medium – All Topics vs. Excl. Ökologie."""
+    fig, ax = plt.subplots(figsize=(10, 6))
+    x = np.arange(len(MEDIUM_ORDER))
+    w = 0.35
+
+    for j, (label, use_all, hatch) in enumerate([
+        ('All Topics',     True,  ''),
+        ('Excl. Ökologie', False, '//'),
+    ]):
+        ds, gs, ps, ns = [], [], [], []
+        for m in MEDIUM_ORDER:
+            sub = paired[paired['Medium'] == m]
+            sub_f = sub if use_all else sub[sub['Topic'].isin(TOPICS_NO_OEK)]
+            g, sd, sem, d, t, p, n = compute_effect(sub_f)
+            ds.append(d if not np.isnan(d) else 0)
+            gs.append(g)
+            ps.append(p if not np.isnan(p) else 1)
+            ns.append(n)
+
+        bars = ax.bar(x + j*w - w/2, ds, w, label=label,
+                      color=[MEDIUM_COLORS[m] for m in MEDIUM_ORDER],
+                      alpha=0.85 if j == 0 else 0.45,
+                      hatch=hatch, edgecolor='white', linewidth=1.2)
+
+        for i, (b, g, p, d_val) in enumerate(zip(bars, gs, ps, ds)):
+            star = sig_stars(p)
+            ax.text(b.get_x() + b.get_width()/2,
+                    max(d_val, 0) + 0.04,
+                    f'd={d_val:.2f}\n{g:+.1f}%\n{star}',
+                    ha='center', va='bottom', fontsize=8.5, fontweight='bold',
+                    color='#333333')
+
+    ax.axhline(0.2, color='gray', lw=1, ls=':', alpha=0.6)
+    ax.axhline(0.5, color='gray', lw=1, ls='--', alpha=0.6)
+    ax.axhline(0.8, color='gray', lw=1, ls='-', alpha=0.4)
+    ax.text(2.65, 0.21, 'small',  fontsize=8, color='gray', va='bottom')
+    ax.text(2.65, 0.51, 'medium', fontsize=8, color='gray', va='bottom')
+    ax.text(2.65, 0.81, 'large',  fontsize=8, color='gray', va='bottom')
+
+    ax.set_xticks(x)
+    ax.set_xticklabels(MEDIUM_ORDER, fontsize=12)
+    ax.set_ylabel("Cohen's d (tutoring score gain)", fontsize=12)
+    ax.set_ylim(bottom=0)
+    ax.legend(fontsize=11)
+    ax.set_title("F1 – Effect Sizes by Medium: All Topics vs. Excl. Ökologie",
+                 fontsize=13, fontweight='bold')
+    fig.tight_layout()
+    fig.savefig(PLOT_DIR / 'F1_cohens_d_comparison.png')
+    plt.close(fig)
+
+
+def plot_F2_mean_gain_comparison(paired):
+    """Bar chart with 95% CI: mean score gain per medium – All vs. Excl. Ökologie."""
+    fig, ax = plt.subplots(figsize=(10, 6))
+    x = np.arange(len(MEDIUM_ORDER))
+    w = 0.35
+
+    for j, (label, filter_fn, alpha, hatch) in enumerate([
+        ('All Topics',     lambda sub: sub,                                   0.80, ''),
+        ('Excl. Ökologie', lambda sub: sub[sub['Topic'].isin(TOPICS_NO_OEK)], 0.45, '//'),
+    ]):
+        means, cis = [], []
+        for m in MEDIUM_ORDER:
+            sub_f = filter_fn(paired[paired['Medium'] == m])
+            g, sd, sem, d, t, p, n = compute_effect(sub_f)
+            means.append(g)
+            cis.append(sem * 1.96)
+
+        bars = ax.bar(x + j*w - w/2, means, w, label=label,
+                      color=[MEDIUM_COLORS[m] for m in MEDIUM_ORDER],
+                      alpha=alpha, hatch=hatch, edgecolor='white', linewidth=1.2,
+                      yerr=cis, capsize=5, error_kw=dict(lw=1.5, capthick=1.5))
+
+        for b, g in zip(bars, means):
+            ax.text(b.get_x() + b.get_width()/2,
+                    g + (b.get_height() * 0.05 if g >= 0 else -2),
+                    f'{g:+.1f}%',
+                    ha='center', va='bottom', fontsize=9, fontweight='bold',
+                    color='#333333')
+
+    ax.axhline(0, color='gray', lw=1)
+    ax.set_xticks(x)
+    ax.set_xticklabels(MEDIUM_ORDER, fontsize=12)
+    ax.set_ylabel('Mean Score Gain (%, 95% CI)', fontsize=12)
+    ax.legend(fontsize=11)
+    ax.set_title('F2 – Mean Score Gain by Medium: All Topics vs. Excl. Ökologie',
+                 fontsize=13, fontweight='bold')
+    fig.tight_layout()
+    fig.savefig(PLOT_DIR / 'F2_mean_gain_comparison.png')
+    plt.close(fig)
+
+
+def plot_F3_paired_slopes_comparison(paired):
+    """2×3 grid: top row = All Topics, bottom row = Excl. Ökologie."""
+    conditions = [
+        ('All Topics',     paired,                                   0),
+        ('Excl. Ökologie', paired[paired['Topic'].isin(TOPICS_NO_OEK)], 1),
+    ]
+
+    fig, axes = plt.subplots(2, 3, figsize=(18, 12), sharey=True)
+
+    for row_idx, (cond_label, data, row) in enumerate(conditions):
+        for col_idx, medium in enumerate(MEDIUM_ORDER):
+            ax = axes[row][col_idx]
+            sub = data[data['Medium'] == medium].sort_values('P_Num')
+
+            for _, r in sub.iterrows():
+                ax.plot([0, 1], [r['Pre_Score'], r['Post_Score']],
+                        color=TOPIC_COLORS[r['Topic']], alpha=0.55, lw=1.5,
+                        marker='o', markersize=5)
+                ax.annotate(r['Participant'], (1.02, r['Post_Score']),
+                            fontsize=7, va='center', alpha=0.6)
+
+            if len(sub) >= 2:
+                pre_m, post_m = sub['Pre_Score'].mean(), sub['Post_Score'].mean()
+                ax.plot([0, 1], [pre_m, post_m],
+                        color=MEDIUM_COLORS[medium], lw=4, marker='D',
+                        markersize=12, zorder=10,
+                        markeredgecolor='white', markeredgewidth=2)
+
+                g, sd, sem, d, t, p, n = compute_effect(sub)
+                star = sig_stars(p)
+                ax.text(0.5, 0.03,
+                        f'n={n}  Gain: {g:+.1f}%\nd={d:.2f}  t={t:.2f}  p={p:.3f} {star}',
+                        transform=ax.transAxes, ha='center', fontsize=9,
+                        bbox=dict(boxstyle='round,pad=0.4', facecolor='lightyellow', alpha=0.9))
+
+            ax.set_xticks([0, 1])
+            ax.set_xticklabels(['Pre-Tutoring', 'Post-Tutoring'], fontsize=10)
+            ax.set_ylim(-5, 110)
+
+            title_color = MEDIUM_COLORS[medium]
+            if col_idx == 0:
+                ax.set_ylabel(f'{cond_label}\nTest Score (%)', fontsize=10, fontweight='bold')
+            if row == 0:
+                ax.set_title(medium, fontsize=13, fontweight='bold', color=title_color)
+
+    legend_els = [Line2D([0],[0], color=TOPIC_COLORS[t], lw=2, marker='o', ms=6, label=t)
+                  for t in TOPIC_ORDER]
+    legend_els.append(Line2D([0],[0], color='gray', lw=4, marker='D', ms=8, label='Medium Mean'))
+    fig.legend(handles=legend_els, loc='lower center', ncol=4, fontsize=10,
+               bbox_to_anchor=(0.5, 0.01))
+    fig.suptitle('F3 – Paired Slopes: All Topics (top) vs. Excl. Ökologie (bottom)',
+                 fontsize=14, fontweight='bold')
+    fig.tight_layout(rect=[0, 0.05, 1, 0.97])
+    fig.savefig(PLOT_DIR / 'F3_paired_slopes_comparison.png')
+    plt.close(fig)
+
+
+def plot_F4_gain_distribution_comparison(paired):
+    """Side-by-side violin+box plots per medium: All Topics vs. Excl. Ökologie."""
+    fig, axes = plt.subplots(1, 3, figsize=(18, 7), sharey=True)
+
+    for col_idx, medium in enumerate(MEDIUM_ORDER):
+        ax = axes[col_idx]
+        data_all = paired[paired['Medium'] == medium]['Score_Gain'].values
+        data_noe = paired[(paired['Medium'] == medium) &
+                          (paired['Topic'].isin(TOPICS_NO_OEK))]['Score_Gain'].values
+
+        positions = [0.8, 2.2]
+        colors    = [MEDIUM_COLORS[medium], MEDIUM_COLORS[medium]]
+        alphas    = [0.75, 0.40]
+        labels_vp = ['All Topics', 'Excl. Ökologie']
+
+        for pos, data, alpha, lbl in zip(positions,
+                                          [data_all, data_noe],
+                                          alphas, labels_vp):
+            if len(data) < 2:
+                continue
+            parts = ax.violinplot(data, positions=[pos], widths=0.9,
+                                  showmedians=False, showextrema=False)
+            for pc in parts['bodies']:
+                pc.set_facecolor(MEDIUM_COLORS[medium])
+                pc.set_alpha(alpha)
+
+            bp = ax.boxplot(data, positions=[pos], widths=0.35,
+                            patch_artist=True, showmeans=True, notch=False,
+                            meanprops=dict(marker='D', markerfacecolor='black',
+                                          markeredgecolor='white', markersize=7),
+                            medianprops=dict(color='white', lw=2),
+                            boxprops=dict(facecolor=MEDIUM_COLORS[medium], alpha=alpha))
+
+            g, sd, sem, d, t, p, n = compute_effect(
+                paired[paired['Medium'] == medium] if lbl == 'All Topics'
+                else paired[(paired['Medium'] == medium) & paired['Topic'].isin(TOPICS_NO_OEK)]
+            )
+            star = sig_stars(p)
+            ax.text(pos, np.nanmax(data) + 4,
+                    f'n={n}\nM={g:+.1f}%\nd={d:.2f} {star}',
+                    ha='center', va='bottom', fontsize=8.5, fontweight='bold')
+
+        ax.axhline(0, color='gray', lw=1, ls='--', alpha=0.6)
+        ax.set_xticks(positions)
+        ax.set_xticklabels(['All\nTopics', 'Excl.\nÖkologie'], fontsize=10)
+        ax.set_title(medium, fontsize=13, fontweight='bold', color=MEDIUM_COLORS[medium])
+        if col_idx == 0:
+            ax.set_ylabel('Score Gain (%)', fontsize=12)
+
+    fig.suptitle('F4 – Gain Distributions: All Topics vs. Excl. Ökologie',
+                 fontsize=14, fontweight='bold')
+    fig.tight_layout()
+    fig.savefig(PLOT_DIR / 'F4_gain_distribution_comparison.png')
+    plt.close(fig)
+
+
+def plot_F5_stats_table(paired):
+    """Rendered table: N, mean gain, SD, d, t, p for each medium × condition."""
+    fig, ax = plt.subplots(figsize=(14, 5))
+    ax.axis('off')
+
+    rows = []
+    for m in MEDIUM_ORDER:
+        for cond_label, filter_fn in [
+            ('All Topics',     lambda sub, _m=m: paired[paired['Medium'] == _m]),
+            ('Excl. Ökologie', lambda sub, _m=m: paired[(paired['Medium'] == _m) &
+                                                         paired['Topic'].isin(TOPICS_NO_OEK)]),
+        ]:
+            sub = filter_fn(None)
+            g, sd, sem, d, t, p, n = compute_effect(sub)
+            star = sig_stars(p) if not np.isnan(p) else ''
+            rows.append([
+                m, cond_label, str(n),
+                f'{g:+.2f}', f'{sd:.2f}',
+                f'{d:.3f}' if not np.isnan(d) else '–',
+                f'{t:.3f}' if not np.isnan(t) else '–',
+                f'{p:.3f}{star}' if not np.isnan(p) else '–',
+            ])
+
+    col_labels = ['Medium', 'Condition', 'N', 'Mean Gain (%)', 'SD',
+                  "Cohen's d", 't-stat', 'p-value']
+    table = ax.table(cellText=rows, colLabels=col_labels,
+                     loc='center', cellLoc='center')
+    table.auto_set_font_size(False)
+    table.set_fontsize(11)
+    table.scale(1.0, 2.0)
+
+    # Header style
+    for j in range(len(col_labels)):
+        table[0, j].set_facecolor('#37474F')
+        table[0, j].set_text_props(color='white', fontweight='bold')
+
+    # Row coloring
+    medium_col_idx = {'Chat': '#BBDEFB', 'Video': '#FFE0B2', 'VR': '#C8E6C9'}
+    cond_row = {'All Topics': 0.85, 'Excl. Ökologie': 0.60}
+    for i, (m, cond, *_) in enumerate(rows):
+        base_color = medium_col_idx[m]
+        alpha_mod  = cond_row[cond]
+        for j in range(len(col_labels)):
+            cell = table[i + 1, j]
+            cell.set_facecolor(base_color)
+            cell.set_alpha(alpha_mod)
+
+    ax.set_title('F5 – Statistical Summary: All Topics vs. Excl. Ökologie',
+                 fontsize=13, fontweight='bold', pad=30)
+    fig.tight_layout()
+    fig.savefig(PLOT_DIR / 'F5_stats_table.png')
+    plt.close(fig)
+
+
+# =============================================================================
+# G. EFFECT PER TOPIC
+# =============================================================================
+
+def plot_G1_effect_per_topic(paired):
+    """Bar chart: mean score gain + Cohen's d per topic, 95% CI."""
+    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
+
+    topics = TOPIC_ORDER
+    means, cis, ds, ps, ns = [], [], [], [], []
+    for t in topics:
+        sub = paired[paired['Topic'] == t]
+        g, sd, sem, d, tv, p, n = compute_effect(sub)
+        means.append(g); cis.append(sem * 1.96)
+        ds.append(d); ps.append(p); ns.append(n)
+
+    bars1 = ax1.bar(topics, means, color=[TOPIC_COLORS[t] for t in topics],
+                    alpha=0.8, yerr=cis, capsize=6, edgecolor='white', lw=1.5)
+    for b, g, p, n in zip(bars1, means, ps, ns):
+        star = sig_stars(p)
+        ax1.text(b.get_x() + b.get_width()/2,
+                 g + (b.get_height() * 0.05 if g >= 0 else -2),
+                 f'{g:+.1f}%\nn={n}\n{star}',
+                 ha='center', va='bottom', fontsize=10, fontweight='bold')
+    ax1.axhline(0, color='gray', lw=1)
+    ax1.set_ylabel('Mean Score Gain (%, 95% CI)', fontsize=12)
+    ax1.set_title('Mean Tutoring Gain per Topic', fontsize=12, fontweight='bold')
+    ax1.set_xticks(range(len(topics)))
+    ax1.set_xticklabels(topics, fontsize=11)
+
+    bars2 = ax2.bar(topics, ds, color=[TOPIC_COLORS[t] for t in topics],
+                    alpha=0.8, edgecolor='white', lw=1.5)
+    for b, d_val, p in zip(bars2, ds, ps):
+        star = sig_stars(p)
+        ax2.text(b.get_x() + b.get_width()/2,
+                 max(d_val, 0) + 0.03,
+                 f"d={d_val:.2f}\n{star}",
+                 ha='center', va='bottom', fontsize=10, fontweight='bold')
+    for thresh, label, ls in [(0.2, 'small', ':'), (0.5, 'medium', '--'), (0.8, 'large', '-')]:
+        ax2.axhline(thresh, color='gray', lw=1, ls=ls, alpha=0.5)
+        ax2.text(2.55, thresh + 0.02, label, fontsize=8, color='gray')
+    ax2.set_ylim(bottom=0)
+    ax2.set_ylabel("Cohen's d", fontsize=12)
+    ax2.set_title("Effect Size (Cohen's d) per Topic", fontsize=12, fontweight='bold')
+    ax2.set_xticks(range(len(topics)))
+    ax2.set_xticklabels(topics, fontsize=11)
+
+    fig.suptitle("G1 – Tutoring Effect per Topic", fontsize=14, fontweight='bold')
+    fig.tight_layout()
+    fig.savefig(PLOT_DIR / 'G1_effect_per_topic.png')
+    plt.close(fig)
+
+
+def plot_G2_slopes_per_topic(paired):
+    """Paired slope plots per topic (3 panels), with medium-colored lines."""
+    fig, axes = plt.subplots(1, 3, figsize=(18, 7), sharey=True)
+
+    for col_idx, topic in enumerate(TOPIC_ORDER):
+        ax = axes[col_idx]
+        sub = paired[paired['Topic'] == topic].sort_values('P_Num')
+
+        for _, r in sub.iterrows():
+            ax.plot([0, 1], [r['Pre_Score'], r['Post_Score']],
+                    color=MEDIUM_COLORS[r['Medium']], alpha=0.5, lw=1.5,
+                    marker='o', markersize=5)
+            ax.annotate(r['Participant'], (1.02, r['Post_Score']),
+                        fontsize=7, va='center', alpha=0.6)
+
+        for medium in MEDIUM_ORDER:
+            msub = sub[sub['Medium'] == medium]
+            if len(msub) > 0:
+                pm, qm = msub['Pre_Score'].mean(), msub['Post_Score'].mean()
+                ax.plot([0, 1], [pm, qm],
+                        color=MEDIUM_COLORS[medium], lw=3.5, marker='D', markersize=10,
+                        zorder=10, markeredgecolor='white', markeredgewidth=2,
+                        label=f'{medium} ({qm-pm:+.1f}%)')
+
+        if len(sub) >= 2:
+            g, sd, sem, d, t, p, n = compute_effect(sub)
+            star = sig_stars(p)
+            ax.text(0.5, 0.03,
+                    f'Overall: {g:+.1f}%  d={d:.2f}\nt={t:.2f}  p={p:.3f} {star}',
+                    transform=ax.transAxes, ha='center', fontsize=9,
+                    bbox=dict(boxstyle='round,pad=0.4', facecolor='lightyellow', alpha=0.9))
+
+        ax.set_xticks([0, 1])
+        ax.set_xticklabels(['Pre-Tutoring', 'Post-Tutoring'], fontsize=11)
+        ax.set_title(topic, fontsize=14, fontweight='bold', color=TOPIC_COLORS[topic])
+        ax.set_ylim(-5, 110)
+        ax.legend(fontsize=9, loc='upper left')
+
+    axes[0].set_ylabel('Test Score (%)', fontsize=12)
+    fig.suptitle('G2 – Paired Slopes by Topic (Medium-Colored Lines)',
+                 fontsize=14, fontweight='bold')
+    fig.tight_layout(rect=[0, 0, 1, 0.96])
+    fig.savefig(PLOT_DIR / 'G2_slopes_per_topic.png')
+    plt.close(fig)
+
+
+# =============================================================================
+# H. ALL MEDIUM × TOPIC COMBINATIONS (3×3 GRID)
+# =============================================================================
+
+def plot_H1_medium_topic_grid(paired):
+    """3×3 grid: rows = mediums, cols = topics. Each cell = slope plot with stats."""
+    fig, axes = plt.subplots(3, 3, figsize=(18, 16), sharey=True)
+
+    for row_idx, medium in enumerate(MEDIUM_ORDER):
+        for col_idx, topic in enumerate(TOPIC_ORDER):
+            ax = axes[row_idx][col_idx]
+            sub = paired[(paired['Medium'] == medium) &
+                         (paired['Topic'] == topic)].sort_values('P_Num')
+
+            for _, r in sub.iterrows():
+                ax.plot([0, 1], [r['Pre_Score'], r['Post_Score']],
+                        color=TOPIC_COLORS[topic], alpha=0.55, lw=1.5,
+                        marker='o', markersize=5)
+                ax.annotate(r['Participant'], (1.02, r['Post_Score']),
+                            fontsize=7, va='center', alpha=0.6)
+
+            if len(sub) >= 2:
+                pre_m, post_m = sub['Pre_Score'].mean(), sub['Post_Score'].mean()
+                ax.plot([0, 1], [pre_m, post_m],
+                        color=MEDIUM_COLORS[medium], lw=4, marker='D', markersize=11,
+                        zorder=10, markeredgecolor='white', markeredgewidth=2)
+
+                g, sd, sem, d, t, p, n = compute_effect(sub)
+                star = sig_stars(p)
+                ax.text(0.5, 0.03,
+                        f'n={n}  {g:+.1f}%\nd={d:.2f}  p={p:.3f} {star}',
+                        transform=ax.transAxes, ha='center', fontsize=8.5,
+                        bbox=dict(boxstyle='round,pad=0.3', facecolor='lightyellow', alpha=0.9))
+            elif len(sub) == 1:
+                r = sub.iloc[0]
+                pre_m, post_m = r['Pre_Score'], r['Post_Score']
+                ax.plot([0, 1], [pre_m, post_m],
+                        color=MEDIUM_COLORS[medium], lw=3, marker='D', markersize=10,
+                        zorder=10, markeredgecolor='white', markeredgewidth=2)
+                ax.text(0.5, 0.03, 'n=1 (no stats)', transform=ax.transAxes,
+                        ha='center', fontsize=8.5, color='gray')
+
+            ax.set_xticks([0, 1])
+            ax.set_xticklabels(['Pre', 'Post'], fontsize=9)
+            ax.set_ylim(-5, 110)
+
+            if col_idx == 0:
+                ax.set_ylabel(f'{medium}\nScore (%)', fontsize=10, fontweight='bold',
+                              color=MEDIUM_COLORS[medium])
+            if row_idx == 0:
+                ax.set_title(topic, fontsize=12, fontweight='bold',
+                             color=TOPIC_COLORS[topic])
+
+    fig.suptitle('H1 – Tutoring Slopes: All Medium × Topic Combinations',
+                 fontsize=15, fontweight='bold')
+    fig.tight_layout(rect=[0, 0, 0.97, 0.97])
+    fig.savefig(PLOT_DIR / 'H1_medium_topic_grid.png')
+    plt.close(fig)
+
+
+# =============================================================================
+# I. OUTLIER INFLUENCE ANALYSIS
+# =============================================================================
+
+def _flag_outliers(paired):
+    """Add 'Outlier' bool column based on IQR rule applied per medium."""
+    paired = paired.copy()
+    paired['Outlier'] = False
+    for m in MEDIUM_ORDER:
+        mask = paired['Medium'] == m
+        paired.loc[mask, 'Outlier'] = iqr_outlier_mask(paired.loc[mask, 'Score_Gain']).values
+    return paired
+
+
+def plot_I1_outlier_scatter(paired):
+    """Scatter of score gains per medium with outliers labeled."""
+    paired_f = _flag_outliers(paired)
+    fig, axes = plt.subplots(1, 3, figsize=(18, 6), sharey=True)
+
+    for col_idx, medium in enumerate(MEDIUM_ORDER):
+        ax = axes[col_idx]
+        sub = paired_f[paired_f['Medium'] == medium]
+
+        q1  = sub['Score_Gain'].quantile(0.25)
+        q3  = sub['Score_Gain'].quantile(0.75)
+        iqr = q3 - q1
+        lo  = q1 - 1.5 * iqr
+        hi  = q3 + 1.5 * iqr
+
+        ax.axhline(hi, color='#E53935', lw=1.5, ls='--', alpha=0.7, label=f'±1.5 IQR ({lo:.1f}–{hi:.1f})')
+        ax.axhline(lo, color='#E53935', lw=1.5, ls='--', alpha=0.7)
+        ax.axhline(0,  color='gray', lw=1, alpha=0.5)
+
+        rng = np.random.default_rng(42)
+        for _, r in sub.iterrows():
+            jit = rng.uniform(-0.12, 0.12)
+            color = '#E53935' if r['Outlier'] else MEDIUM_COLORS[medium]
+            ms    = 10 if r['Outlier'] else 7
+            ax.scatter(0.5 + jit, r['Score_Gain'], color=color, s=ms**2,
+                       alpha=0.8, edgecolors='white', lw=0.5, zorder=5)
+            if r['Outlier']:
+                lbl = f"{r['Participant']}\n({r['Topic'][:6]})"
+                ax.annotate(lbl, (0.5 + jit, r['Score_Gain']),
+                            fontsize=7.5, ha='center',
+                            xytext=(20 if jit > 0 else -20, 0),
+                            textcoords='offset points',
+                            arrowprops=dict(arrowstyle='->', color='#E53935', lw=0.8),
+                            color='#E53935', fontweight='bold')
+
+        n_out = sub['Outlier'].sum()
+        ax.set_xlim(0, 1)
+        ax.set_xticks([0.5])
+        ax.set_xticklabels([medium], fontsize=12)
+        ax.set_title(f'{medium}\n({n_out} outlier{"s" if n_out != 1 else ""})',
+                     fontsize=12, fontweight='bold', color=MEDIUM_COLORS[medium])
+        if col_idx == 0:
+            ax.set_ylabel('Score Gain (%)', fontsize=12)
+        ax.legend(fontsize=8, loc='upper right')
+
+    fig.suptitle('I1 – Score Gain Scatter with Outlier Flags (IQR Method)',
+                 fontsize=14, fontweight='bold')
+    fig.tight_layout()
+    fig.savefig(PLOT_DIR / 'I1_outlier_scatter.png')
+    plt.close(fig)
+
+
+def plot_I2_outlier_effect_comparison(paired):
+    """Cohen's d per medium: all data vs. outliers removed."""
+    paired_f = _flag_outliers(paired)
+    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
+
+    x = np.arange(len(MEDIUM_ORDER))
+    w = 0.35
+    all_ds, no_out_ds = [], []
+    all_gs, no_out_gs = [], []
+    all_ps, no_out_ps = [], []
+    all_ns, no_out_ns = [], []
+
+    for m in MEDIUM_ORDER:
+        sub_all = paired_f[paired_f['Medium'] == m]
+        sub_noo = sub_all[~sub_all['Outlier']]
+        g1, _, _, d1, t1, p1, n1 = compute_effect(sub_all)
+        g2, _, _, d2, t2, p2, n2 = compute_effect(sub_noo)
+        all_ds.append(d1 if not np.isnan(d1) else 0)
+        no_out_ds.append(d2 if not np.isnan(d2) else 0)
+        all_gs.append(g1); no_out_gs.append(g2)
+        all_ps.append(p1); no_out_ps.append(p2)
+        all_ns.append(n1); no_out_ns.append(n2)
+
+    for j, (label, ds, gs, ps, ns, alpha, hatch) in enumerate([
+        ('All Data',           all_ds,    all_gs,    all_ps,    all_ns,    0.80, ''),
+        ('Outliers Removed',   no_out_ds, no_out_gs, no_out_ps, no_out_ns, 0.45, '//'),
+    ]):
+        bars = ax1.bar(x + j*w - w/2, ds, w, label=label,
+                       color=[MEDIUM_COLORS[m] for m in MEDIUM_ORDER],
+                       alpha=alpha, hatch=hatch, edgecolor='white', lw=1.2)
+        for b, d_val, g, p, n in zip(bars, ds, gs, ps, ns):
+            star = sig_stars(p) if not np.isnan(p) else ''
+            ax1.text(b.get_x() + b.get_width()/2,
+                     max(d_val, 0) + 0.03,
+                     f'd={d_val:.2f}\n{g:+.1f}%\n{star}',
+                     ha='center', va='bottom', fontsize=8.5, fontweight='bold')
+
+    for thresh, lbl, ls in [(0.2,'small',':'), (0.5,'medium','--'), (0.8,'large','-')]:
+        ax1.axhline(thresh, color='gray', lw=1, ls=ls, alpha=0.5)
+        ax1.text(2.65, thresh + 0.02, lbl, fontsize=8, color='gray')
+    ax1.set_xticks(x); ax1.set_xticklabels(MEDIUM_ORDER, fontsize=12)
+    ax1.set_ylim(bottom=0)
+    ax1.set_ylabel("Cohen's d", fontsize=12)
+    ax1.set_title("Cohen's d: All Data vs. Outliers Removed", fontsize=12, fontweight='bold')
+    ax1.legend(fontsize=10)
+
+    # Delta d
+    delta_d = [no - al for al, no in zip(all_ds, no_out_ds)]
+    colors_d = ['#43A047' if dd >= 0 else '#E53935' for dd in delta_d]
+    bars2 = ax2.bar(MEDIUM_ORDER, delta_d, color=colors_d, alpha=0.8, edgecolor='white', lw=1.5)
+    for b, dd in zip(bars2, delta_d):
+        ax2.text(b.get_x() + b.get_width()/2,
+                 dd + (0.01 if dd >= 0 else -0.03),
+                 f'Δd={dd:+.3f}',
+                 ha='center', va='bottom' if dd >= 0 else 'top',
+                 fontsize=10, fontweight='bold')
+    ax2.axhline(0, color='gray', lw=1)
+    ax2.set_ylabel('Δ Cohen\'s d (Outliers Removed − All)', fontsize=12)
+    ax2.set_title('Change in Effect Size After Removing Outliers', fontsize=12, fontweight='bold')
+
+    fig.suptitle('I2 – Outlier Influence on Effect Sizes', fontsize=14, fontweight='bold')
+    fig.tight_layout()
+    fig.savefig(PLOT_DIR / 'I2_outlier_effect_comparison.png')
+    plt.close(fig)
+
+
+def plot_I3_outlier_heatmap(paired):
+    """Heatmap: which participant×topic pairs are outliers per medium."""
+    paired_f = _flag_outliers(paired)
+
+    fig, axes = plt.subplots(1, 3, figsize=(18, 8))
+
+    for col_idx, medium in enumerate(MEDIUM_ORDER):
+        ax = axes[col_idx]
+        sub = paired_f[paired_f['Medium'] == medium].copy()
+        sub['Label'] = sub['Participant'] + '\n' + sub['Topic'].str[:8]
+
+        # Build pivot: rows = participants sorted, cols = topics
+        pivot = sub.pivot_table(index='Participant', columns='Topic',
+                                values='Score_Gain', aggfunc='first')
+        pivot = pivot.reindex(columns=TOPIC_ORDER)
+        pivot = pivot.reindex(sorted(pivot.index, key=lambda x: int(x[1:])))
+
+        outlier_pivot = sub.pivot_table(index='Participant', columns='Topic',
+                                        values='Outlier', aggfunc='first')
+        outlier_pivot = outlier_pivot.reindex(columns=TOPIC_ORDER)
+        outlier_pivot = outlier_pivot.reindex(sorted(outlier_pivot.index,
+                                                      key=lambda x: int(x[1:])))
+
+        # Draw heatmap of score gain
+        sns.heatmap(pivot.astype(float), annot=True, fmt='.1f',
+                    cmap='RdYlGn', center=0, vmin=-40, vmax=60,
+                    linewidths=0.8, ax=ax, cbar_kws={'label': 'Score Gain %'},
+                    annot_kws={'size': 9})
+
+        # Overlay red border for outliers
+        for r_i, pid in enumerate(pivot.index):
+            for c_i, topic in enumerate(TOPIC_ORDER):
+                is_out = outlier_pivot.loc[pid, topic] if (pid in outlier_pivot.index and
+                         topic in outlier_pivot.columns) else False
+                if is_out:
+                    ax.add_patch(mpatches.Rectangle(
+                        (c_i, r_i), 1, 1,
+                        fill=False, edgecolor='#E53935', lw=3, zorder=5))
+
+        ax.set_title(f'{medium}', fontsize=13, fontweight='bold',
+                     color=MEDIUM_COLORS[medium])
+        ax.set_xlabel('Topic', fontsize=10)
+        ax.set_ylabel('Participant' if col_idx == 0 else '', fontsize=10)
+
+    fig.suptitle('I3 – Outlier Heatmap: Score Gain by Participant × Topic\n'
+                 '(Red border = IQR outlier within that medium)',
+                 fontsize=13, fontweight='bold')
+    fig.tight_layout()
+    fig.savefig(PLOT_DIR / 'I3_outlier_heatmap.png')
+    plt.close(fig)
+
+
+# =============================================================================
+# STATS EXPORT
+# =============================================================================
+
+def export_stats(paired):
+    paired_f = _flag_outliers(paired)
+
+    # --- F: effects by medium with/without Ökologie ---
+    rows_f = []
+    for m in MEDIUM_ORDER:
+        for cond_label, filter_fn in [
+            ('All Topics',     lambda sub: sub),
+            ('Excl_Oekologie', lambda sub: sub[sub['Topic'].isin(TOPICS_NO_OEK)]),
+        ]:
+            sub = filter_fn(paired[paired['Medium'] == m])
+            g, sd, sem, d, t, p, n = compute_effect(sub)
+            rows_f.append({
+                'Medium': m, 'Condition': cond_label, 'N': n,
+                'Mean_Gain': round(g, 3) if not np.isnan(g) else np.nan,
+                'SD_Gain': round(sd, 3) if not np.isnan(sd) else np.nan,
+                'Cohens_d': round(d, 3) if not np.isnan(d) else np.nan,
+                't_stat': round(t, 3) if not np.isnan(t) else np.nan,
+                'p_value': round(p, 4) if not np.isnan(p) else np.nan,
+            })
+    pd.DataFrame(rows_f).to_csv(
+        STATS_DIR / 'effects_by_medium_with_without_oekologie.csv', index=False)
+
+    # --- G: effects by topic ---
+    rows_g = []
+    for topic in TOPIC_ORDER:
+        sub = paired[paired['Topic'] == topic]
+        g, sd, sem, d, t, p, n = compute_effect(sub)
+        rows_g.append({
+            'Topic': topic, 'N': n,
+            'Mean_Gain': round(g, 3),
+            'SD_Gain': round(sd, 3),
+            'Cohens_d': round(d, 3) if not np.isnan(d) else np.nan,
+            't_stat': round(t, 3) if not np.isnan(t) else np.nan,
+            'p_value': round(p, 4) if not np.isnan(p) else np.nan,
+        })
+    pd.DataFrame(rows_g).to_csv(STATS_DIR / 'effects_by_topic.csv', index=False)
+
+    # --- H: effects by medium × topic ---
+    rows_h = []
+    for m in MEDIUM_ORDER:
+        for topic in TOPIC_ORDER:
+            sub = paired[(paired['Medium'] == m) & (paired['Topic'] == topic)]
+            g, sd, sem, d, t, p, n = compute_effect(sub)
+            rows_h.append({
+                'Medium': m, 'Topic': topic, 'N': n,
+                'Mean_Gain': round(g, 3) if not np.isnan(g) else np.nan,
+                'SD_Gain': round(sd, 3) if not np.isnan(sd) else np.nan,
+                'Cohens_d': round(d, 3) if not np.isnan(d) else np.nan,
+                't_stat': round(t, 3) if not np.isnan(t) else np.nan,
+                'p_value': round(p, 4) if not np.isnan(p) else np.nan,
+            })
+    pd.DataFrame(rows_h).to_csv(STATS_DIR / 'effects_by_medium_topic_grid.csv', index=False)
+
+    # --- I: outlier influence ---
+    rows_i = []
+    for m in MEDIUM_ORDER:
+        sub_all = paired_f[paired_f['Medium'] == m]
+        sub_noo = sub_all[~sub_all['Outlier']]
+        g1, sd1, _, d1, t1, p1, n1 = compute_effect(sub_all)
+        g2, sd2, _, d2, t2, p2, n2 = compute_effect(sub_noo)
+        outliers = sub_all[sub_all['Outlier']][['Participant', 'Topic', 'Score_Gain']]
+        out_list = '; '.join(f"{r['Participant']}/{r['Topic']}({r['Score_Gain']:+.1f}%)"
+                             for _, r in outliers.iterrows())
+        rows_i.append({
+            'Medium': m,
+            'N_all': n1, 'Mean_Gain_all': round(g1, 3), 'Cohens_d_all': round(d1, 3) if not np.isnan(d1) else np.nan,
+            'p_all': round(p1, 4) if not np.isnan(p1) else np.nan,
+            'N_no_outliers': n2, 'Mean_Gain_no_outliers': round(g2, 3),
+            'Cohens_d_no_outliers': round(d2, 3) if not np.isnan(d2) else np.nan,
+            'p_no_outliers': round(p2, 4) if not np.isnan(p2) else np.nan,
+            'Delta_d': round((d2 - d1) if not (np.isnan(d1) or np.isnan(d2)) else np.nan, 3),
+            'Outliers': out_list,
+        })
+    pd.DataFrame(rows_i).to_csv(STATS_DIR / 'outlier_influence.csv', index=False)
+
+    print(f"  Stats exported to: {STATS_DIR}")
+
+
+# =============================================================================
+# MAIN
+# =============================================================================
+
+def main():
+    print("Loading data...")
+    df     = load_data()
+    paired = build_paired_tutoring(df)
+    print(f"  {len(paired)} paired tutoring entries across "
+          f"{paired['Participant'].nunique()} participants\n")
+
+    sections = [
+        ("F. Effect Without Ökologie (vs. With)", [
+            ("F1", "Cohen's d comparison by medium",          lambda: plot_F1_cohens_d_comparison(paired)),
+            ("F2", "Mean score gain comparison",              lambda: plot_F2_mean_gain_comparison(paired)),
+            ("F3", "Paired slopes 2×3 grid",                  lambda: plot_F3_paired_slopes_comparison(paired)),
+            ("F4", "Gain distribution comparison",            lambda: plot_F4_gain_distribution_comparison(paired)),
+            ("F5", "Descriptive stats table",                 lambda: plot_F5_stats_table(paired)),
+        ]),
+        ("G. Effect Per Topic", [
+            ("G1", "Bar chart: gain + d per topic",           lambda: plot_G1_effect_per_topic(paired)),
+            ("G2", "Paired slopes per topic",                 lambda: plot_G2_slopes_per_topic(paired)),
+        ]),
+        ("H. Medium × Topic Grid", [
+            ("H1", "3×3 grid of slope plots",                 lambda: plot_H1_medium_topic_grid(paired)),
+        ]),
+        ("I. Outlier Influence Analysis", [
+            ("I1", "Outlier scatter per medium",              lambda: plot_I1_outlier_scatter(paired)),
+            ("I2", "Effect size: all vs. outliers removed",   lambda: plot_I2_outlier_effect_comparison(paired)),
+            ("I3", "Outlier heatmap (participant × topic)",   lambda: plot_I3_outlier_heatmap(paired)),
+        ]),
+    ]
+
+    for section_name, plots in sections:
+        print(section_name)
+        for code, desc, fn in plots:
+            fn()
+            print(f"  [{code}] {desc}")
+
+    print(f"\n11 plots saved to: {PLOT_DIR}")
+
+    print("\nExporting statistics...")
+    export_stats(paired)
+    print("Done.")
+
+
+if __name__ == "__main__":
+    main()
--- a/generate_plots_questionnaires.py
+++ b/generate_plots_questionnaires.py
@ -29,7 +29,7 @@ from scipy import stats
 # PATHS & CONSTANTS
 # =============================================================================

-BASE = Path(r"F:\GitHub Projekte\VirTu-Eval\Data")
+BASE = Path(__file__).resolve().parent / "Data"
 PLOT_DIR = BASE / "plots_questionnaires"
 PLOT_DIR.mkdir(exist_ok=True)
 STATS_DIR = BASE / "stats"