added comparison without ökologie

2026-02-28 16:12:09 +01:00
parent 9e50db2da6
commit b1039e6a7f
48 changed files with 977 additions and 2 deletions
--- a/generate_plots_effects.py
+++ b/generate_plots_effects.py
@ -0,0 +1,855 @@
+"""
+generate_plots_effects.py
+
+Effect-focused analysis for VirTu-Eval experiment data.
+Generates plots into Data/plots_effects/ organized by section:
+
+  F. Effect Without Ökologie (vs. With)  – 5 plots
+  G. Effect Per Topic                    – 2 plots
+  H. All Medium × Topic Combinations     – 1 plot (3×3 grid)
+  I. Outlier Influence Analysis          – 3 plots
+
+Usage:
+  python generate_plots_effects.py
+"""
+
+import pandas as pd
+import numpy as np
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+import matplotlib.patches as mpatches
+from matplotlib.lines import Line2D
+import seaborn as sns
+from pathlib import Path
+from scipy import stats
+
+# =============================================================================
+# CONFIG
+# =============================================================================
+BASE = Path("Data")
+PLOT_DIR = BASE / "plots_effects"
+PLOT_DIR.mkdir(parents=True, exist_ok=True)
+STATS_DIR = BASE / "stats"
+STATS_DIR.mkdir(exist_ok=True)
+
+PHASE_ORDER  = ['Pre-Reading', 'Post-Reading', 'Pre-Tutoring', 'Post-Tutoring']
+PHASE_LABELS = ['Pre-Read', 'Post-Read', 'Pre-Tutor', 'Post-Tutor']
+PHASE_SHORT  = dict(zip(PHASE_ORDER, PHASE_LABELS))
+
+MEDIUM_ORDER  = ['Chat', 'Video', 'VR']
+MEDIUM_COLORS = {'Chat': '#2196F3', 'Video': '#FF9800', 'VR': '#4CAF50'}
+
+TOPIC_ORDER  = ['Mendel', 'DNA-Replikation', 'Ökologie']
+TOPIC_COLORS = {'Mendel': '#E91E63', 'DNA-Replikation': '#9C27B0', 'Ökologie': '#009688'}
+
+TOPICS_NO_OEK = ['Mendel', 'DNA-Replikation']
+
+sns.set_theme(style="whitegrid", font_scale=1.05)
+plt.rcParams['figure.dpi'] = 150
+plt.rcParams['savefig.bbox'] = 'tight'
+
+
+# =============================================================================
+# HELPERS
+# =============================================================================
+
+def cohens_d(pre, post):
+    diff = post - pre
+    return diff.mean() / diff.std(ddof=1) if diff.std(ddof=1) > 0 else 0.0
+
+
+def sig_stars(p):
+    if p < 0.001:
+        return '***'
+    elif p < 0.01:
+        return '**'
+    elif p < 0.05:
+        return '*'
+    return 'n.s.'
+
+
+def compute_effect(sub):
+    """Return (mean_gain, sd_gain, sem_gain, d, t, p, n) for a paired subset."""
+    pre  = sub['Pre_Score']
+    post = sub['Post_Score']
+    n = len(sub)
+    if n < 2:
+        return sub['Score_Gain'].mean(), sub['Score_Gain'].std(), np.nan, np.nan, np.nan, np.nan, n
+    t, p = stats.ttest_rel(pre, post)
+    d    = cohens_d(pre, post)
+    g    = sub['Score_Gain']
+    return g.mean(), g.std(ddof=1), g.sem(), d, t, p, n
+
+
+def iqr_outlier_mask(series):
+    """Return boolean Series: True where value is an IQR outlier (1.5×IQR rule)."""
+    q1, q3 = series.quantile(0.25), series.quantile(0.75)
+    iqr = q3 - q1
+    return (series < q1 - 1.5 * iqr) | (series > q3 + 1.5 * iqr)
+
+
+# =============================================================================
+# DATA LOADING
+# =============================================================================
+
+def load_data():
+    df = pd.read_csv(BASE / "test_scores_all.csv", encoding="utf-8-sig")
+    df['Zeitpunkt'] = df['Zeitpunkt'].str.strip().replace('Pre-Tutor', 'Pre-Tutoring')
+    df['Phase'] = pd.Categorical(df['Zeitpunkt'], categories=PHASE_ORDER, ordered=True)
+    df['P_Num'] = df['Participant'].str.extract(r'(\d+)').astype(int)
+    return df
+
+
+def build_paired_tutoring(df):
+    pre = df[df['Zeitpunkt'] == 'Pre-Tutoring'][
+        ['Participant', 'Topic', 'Medium', 'Score_Pct', 'Avg_Confidence']].copy()
+    post = df[df['Zeitpunkt'] == 'Post-Tutoring'][
+        ['Participant', 'Topic', 'Medium', 'Score_Pct', 'Avg_Confidence']].copy()
+    pre.columns  = ['Participant', 'Topic', 'Medium', 'Pre_Score',  'Pre_Conf']
+    post.columns = ['Participant', 'Topic', 'Medium', 'Post_Score', 'Post_Conf']
+    paired = pre.merge(post, on=['Participant', 'Topic', 'Medium'])
+    paired['Score_Gain'] = paired['Post_Score'] - paired['Pre_Score']
+    paired['Conf_Gain']  = paired['Post_Conf']  - paired['Pre_Conf']
+    paired['P_Num'] = paired['Participant'].str.extract(r'(\d+)').astype(int)
+    return paired
+
+
+# =============================================================================
+# F. EFFECT WITHOUT ÖKOLOGIE (vs. WITH)
+# =============================================================================
+
+def plot_F1_cohens_d_comparison(paired):
+    """Bar chart: Cohen's d per medium – All Topics vs. Excl. Ökologie."""
+    fig, ax = plt.subplots(figsize=(10, 6))
+    x = np.arange(len(MEDIUM_ORDER))
+    w = 0.35
+
+    for j, (label, use_all, hatch) in enumerate([
+        ('All Topics',     True,  ''),
+        ('Excl. Ökologie', False, '//'),
+    ]):
+        ds, gs, ps, ns = [], [], [], []
+        for m in MEDIUM_ORDER:
+            sub = paired[paired['Medium'] == m]
+            sub_f = sub if use_all else sub[sub['Topic'].isin(TOPICS_NO_OEK)]
+            g, sd, sem, d, t, p, n = compute_effect(sub_f)
+            ds.append(d if not np.isnan(d) else 0)
+            gs.append(g)
+            ps.append(p if not np.isnan(p) else 1)
+            ns.append(n)
+
+        bars = ax.bar(x + j*w - w/2, ds, w, label=label,
+                      color=[MEDIUM_COLORS[m] for m in MEDIUM_ORDER],
+                      alpha=0.85 if j == 0 else 0.45,
+                      hatch=hatch, edgecolor='white', linewidth=1.2)
+
+        for i, (b, g, p, d_val) in enumerate(zip(bars, gs, ps, ds)):
+            star = sig_stars(p)
+            ax.text(b.get_x() + b.get_width()/2,
+                    max(d_val, 0) + 0.04,
+                    f'd={d_val:.2f}\n{g:+.1f}%\n{star}',
+                    ha='center', va='bottom', fontsize=8.5, fontweight='bold',
+                    color='#333333')
+
+    ax.axhline(0.2, color='gray', lw=1, ls=':', alpha=0.6)
+    ax.axhline(0.5, color='gray', lw=1, ls='--', alpha=0.6)
+    ax.axhline(0.8, color='gray', lw=1, ls='-', alpha=0.4)
+    ax.text(2.65, 0.21, 'small',  fontsize=8, color='gray', va='bottom')
+    ax.text(2.65, 0.51, 'medium', fontsize=8, color='gray', va='bottom')
+    ax.text(2.65, 0.81, 'large',  fontsize=8, color='gray', va='bottom')
+
+    ax.set_xticks(x)
+    ax.set_xticklabels(MEDIUM_ORDER, fontsize=12)
+    ax.set_ylabel("Cohen's d (tutoring score gain)", fontsize=12)
+    ax.set_ylim(bottom=0)
+    ax.legend(fontsize=11)
+    ax.set_title("F1 – Effect Sizes by Medium: All Topics vs. Excl. Ökologie",
+                 fontsize=13, fontweight='bold')
+    fig.tight_layout()
+    fig.savefig(PLOT_DIR / 'F1_cohens_d_comparison.png')
+    plt.close(fig)
+
+
+def plot_F2_mean_gain_comparison(paired):
+    """Bar chart with 95% CI: mean score gain per medium – All vs. Excl. Ökologie."""
+    fig, ax = plt.subplots(figsize=(10, 6))
+    x = np.arange(len(MEDIUM_ORDER))
+    w = 0.35
+
+    for j, (label, filter_fn, alpha, hatch) in enumerate([
+        ('All Topics',     lambda sub: sub,                                   0.80, ''),
+        ('Excl. Ökologie', lambda sub: sub[sub['Topic'].isin(TOPICS_NO_OEK)], 0.45, '//'),
+    ]):
+        means, cis = [], []
+        for m in MEDIUM_ORDER:
+            sub_f = filter_fn(paired[paired['Medium'] == m])
+            g, sd, sem, d, t, p, n = compute_effect(sub_f)
+            means.append(g)
+            cis.append(sem * 1.96)
+
+        bars = ax.bar(x + j*w - w/2, means, w, label=label,
+                      color=[MEDIUM_COLORS[m] for m in MEDIUM_ORDER],
+                      alpha=alpha, hatch=hatch, edgecolor='white', linewidth=1.2,
+                      yerr=cis, capsize=5, error_kw=dict(lw=1.5, capthick=1.5))
+
+        for b, g in zip(bars, means):
+            ax.text(b.get_x() + b.get_width()/2,
+                    g + (b.get_height() * 0.05 if g >= 0 else -2),
+                    f'{g:+.1f}%',
+                    ha='center', va='bottom', fontsize=9, fontweight='bold',
+                    color='#333333')
+
+    ax.axhline(0, color='gray', lw=1)
+    ax.set_xticks(x)
+    ax.set_xticklabels(MEDIUM_ORDER, fontsize=12)
+    ax.set_ylabel('Mean Score Gain (%, 95% CI)', fontsize=12)
+    ax.legend(fontsize=11)
+    ax.set_title('F2 – Mean Score Gain by Medium: All Topics vs. Excl. Ökologie',
+                 fontsize=13, fontweight='bold')
+    fig.tight_layout()
+    fig.savefig(PLOT_DIR / 'F2_mean_gain_comparison.png')
+    plt.close(fig)
+
+
+def plot_F3_paired_slopes_comparison(paired):
+    """2×3 grid: top row = All Topics, bottom row = Excl. Ökologie."""
+    conditions = [
+        ('All Topics',     paired,                                   0),
+        ('Excl. Ökologie', paired[paired['Topic'].isin(TOPICS_NO_OEK)], 1),
+    ]
+
+    fig, axes = plt.subplots(2, 3, figsize=(18, 12), sharey=True)
+
+    for row_idx, (cond_label, data, row) in enumerate(conditions):
+        for col_idx, medium in enumerate(MEDIUM_ORDER):
+            ax = axes[row][col_idx]
+            sub = data[data['Medium'] == medium].sort_values('P_Num')
+
+            for _, r in sub.iterrows():
+                ax.plot([0, 1], [r['Pre_Score'], r['Post_Score']],
+                        color=TOPIC_COLORS[r['Topic']], alpha=0.55, lw=1.5,
+                        marker='o', markersize=5)
+                ax.annotate(r['Participant'], (1.02, r['Post_Score']),
+                            fontsize=7, va='center', alpha=0.6)
+
+            if len(sub) >= 2:
+                pre_m, post_m = sub['Pre_Score'].mean(), sub['Post_Score'].mean()
+                ax.plot([0, 1], [pre_m, post_m],
+                        color=MEDIUM_COLORS[medium], lw=4, marker='D',
+                        markersize=12, zorder=10,
+                        markeredgecolor='white', markeredgewidth=2)
+
+                g, sd, sem, d, t, p, n = compute_effect(sub)
+                star = sig_stars(p)
+                ax.text(0.5, 0.03,
+                        f'n={n}  Gain: {g:+.1f}%\nd={d:.2f}  t={t:.2f}  p={p:.3f} {star}',
+                        transform=ax.transAxes, ha='center', fontsize=9,
+                        bbox=dict(boxstyle='round,pad=0.4', facecolor='lightyellow', alpha=0.9))
+
+            ax.set_xticks([0, 1])
+            ax.set_xticklabels(['Pre-Tutoring', 'Post-Tutoring'], fontsize=10)
+            ax.set_ylim(-5, 110)
+
+            title_color = MEDIUM_COLORS[medium]
+            if col_idx == 0:
+                ax.set_ylabel(f'{cond_label}\nTest Score (%)', fontsize=10, fontweight='bold')
+            if row == 0:
+                ax.set_title(medium, fontsize=13, fontweight='bold', color=title_color)
+
+    legend_els = [Line2D([0],[0], color=TOPIC_COLORS[t], lw=2, marker='o', ms=6, label=t)
+                  for t in TOPIC_ORDER]
+    legend_els.append(Line2D([0],[0], color='gray', lw=4, marker='D', ms=8, label='Medium Mean'))
+    fig.legend(handles=legend_els, loc='lower center', ncol=4, fontsize=10,
+               bbox_to_anchor=(0.5, 0.01))
+    fig.suptitle('F3 – Paired Slopes: All Topics (top) vs. Excl. Ökologie (bottom)',
+                 fontsize=14, fontweight='bold')
+    fig.tight_layout(rect=[0, 0.05, 1, 0.97])
+    fig.savefig(PLOT_DIR / 'F3_paired_slopes_comparison.png')
+    plt.close(fig)
+
+
+def plot_F4_gain_distribution_comparison(paired):
+    """Side-by-side violin+box plots per medium: All Topics vs. Excl. Ökologie."""
+    fig, axes = plt.subplots(1, 3, figsize=(18, 7), sharey=True)
+
+    for col_idx, medium in enumerate(MEDIUM_ORDER):
+        ax = axes[col_idx]
+        data_all = paired[paired['Medium'] == medium]['Score_Gain'].values
+        data_noe = paired[(paired['Medium'] == medium) &
+                          (paired['Topic'].isin(TOPICS_NO_OEK))]['Score_Gain'].values
+
+        positions = [0.8, 2.2]
+        colors    = [MEDIUM_COLORS[medium], MEDIUM_COLORS[medium]]
+        alphas    = [0.75, 0.40]
+        labels_vp = ['All Topics', 'Excl. Ökologie']
+
+        for pos, data, alpha, lbl in zip(positions,
+                                          [data_all, data_noe],
+                                          alphas, labels_vp):
+            if len(data) < 2:
+                continue
+            parts = ax.violinplot(data, positions=[pos], widths=0.9,
+                                  showmedians=False, showextrema=False)
+            for pc in parts['bodies']:
+                pc.set_facecolor(MEDIUM_COLORS[medium])
+                pc.set_alpha(alpha)
+
+            bp = ax.boxplot(data, positions=[pos], widths=0.35,
+                            patch_artist=True, showmeans=True, notch=False,
+                            meanprops=dict(marker='D', markerfacecolor='black',
+                                          markeredgecolor='white', markersize=7),
+                            medianprops=dict(color='white', lw=2),
+                            boxprops=dict(facecolor=MEDIUM_COLORS[medium], alpha=alpha))
+
+            g, sd, sem, d, t, p, n = compute_effect(
+                paired[paired['Medium'] == medium] if lbl == 'All Topics'
+                else paired[(paired['Medium'] == medium) & paired['Topic'].isin(TOPICS_NO_OEK)]
+            )
+            star = sig_stars(p)
+            ax.text(pos, np.nanmax(data) + 4,
+                    f'n={n}\nM={g:+.1f}%\nd={d:.2f} {star}',
+                    ha='center', va='bottom', fontsize=8.5, fontweight='bold')
+
+        ax.axhline(0, color='gray', lw=1, ls='--', alpha=0.6)
+        ax.set_xticks(positions)
+        ax.set_xticklabels(['All\nTopics', 'Excl.\nÖkologie'], fontsize=10)
+        ax.set_title(medium, fontsize=13, fontweight='bold', color=MEDIUM_COLORS[medium])
+        if col_idx == 0:
+            ax.set_ylabel('Score Gain (%)', fontsize=12)
+
+    fig.suptitle('F4 – Gain Distributions: All Topics vs. Excl. Ökologie',
+                 fontsize=14, fontweight='bold')
+    fig.tight_layout()
+    fig.savefig(PLOT_DIR / 'F4_gain_distribution_comparison.png')
+    plt.close(fig)
+
+
+def plot_F5_stats_table(paired):
+    """Rendered table: N, mean gain, SD, d, t, p for each medium × condition."""
+    fig, ax = plt.subplots(figsize=(14, 5))
+    ax.axis('off')
+
+    rows = []
+    for m in MEDIUM_ORDER:
+        for cond_label, filter_fn in [
+            ('All Topics',     lambda sub, _m=m: paired[paired['Medium'] == _m]),
+            ('Excl. Ökologie', lambda sub, _m=m: paired[(paired['Medium'] == _m) &
+                                                         paired['Topic'].isin(TOPICS_NO_OEK)]),
+        ]:
+            sub = filter_fn(None)
+            g, sd, sem, d, t, p, n = compute_effect(sub)
+            star = sig_stars(p) if not np.isnan(p) else ''
+            rows.append([
+                m, cond_label, str(n),
+                f'{g:+.2f}', f'{sd:.2f}',
+                f'{d:.3f}' if not np.isnan(d) else '–',
+                f'{t:.3f}' if not np.isnan(t) else '–',
+                f'{p:.3f}{star}' if not np.isnan(p) else '–',
+            ])
+
+    col_labels = ['Medium', 'Condition', 'N', 'Mean Gain (%)', 'SD',
+                  "Cohen's d", 't-stat', 'p-value']
+    table = ax.table(cellText=rows, colLabels=col_labels,
+                     loc='center', cellLoc='center')
+    table.auto_set_font_size(False)
+    table.set_fontsize(11)
+    table.scale(1.0, 2.0)
+
+    # Header style
+    for j in range(len(col_labels)):
+        table[0, j].set_facecolor('#37474F')
+        table[0, j].set_text_props(color='white', fontweight='bold')
+
+    # Row coloring
+    medium_col_idx = {'Chat': '#BBDEFB', 'Video': '#FFE0B2', 'VR': '#C8E6C9'}
+    cond_row = {'All Topics': 0.85, 'Excl. Ökologie': 0.60}
+    for i, (m, cond, *_) in enumerate(rows):
+        base_color = medium_col_idx[m]
+        alpha_mod  = cond_row[cond]
+        for j in range(len(col_labels)):
+            cell = table[i + 1, j]
+            cell.set_facecolor(base_color)
+            cell.set_alpha(alpha_mod)
+
+    ax.set_title('F5 – Statistical Summary: All Topics vs. Excl. Ökologie',
+                 fontsize=13, fontweight='bold', pad=30)
+    fig.tight_layout()
+    fig.savefig(PLOT_DIR / 'F5_stats_table.png')
+    plt.close(fig)
+
+
+# =============================================================================
+# G. EFFECT PER TOPIC
+# =============================================================================
+
+def plot_G1_effect_per_topic(paired):
+    """Bar chart: mean score gain + Cohen's d per topic, 95% CI."""
+    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
+
+    topics = TOPIC_ORDER
+    means, cis, ds, ps, ns = [], [], [], [], []
+    for t in topics:
+        sub = paired[paired['Topic'] == t]
+        g, sd, sem, d, tv, p, n = compute_effect(sub)
+        means.append(g); cis.append(sem * 1.96)
+        ds.append(d); ps.append(p); ns.append(n)
+
+    bars1 = ax1.bar(topics, means, color=[TOPIC_COLORS[t] for t in topics],
+                    alpha=0.8, yerr=cis, capsize=6, edgecolor='white', lw=1.5)
+    for b, g, p, n in zip(bars1, means, ps, ns):
+        star = sig_stars(p)
+        ax1.text(b.get_x() + b.get_width()/2,
+                 g + (b.get_height() * 0.05 if g >= 0 else -2),
+                 f'{g:+.1f}%\nn={n}\n{star}',
+                 ha='center', va='bottom', fontsize=10, fontweight='bold')
+    ax1.axhline(0, color='gray', lw=1)
+    ax1.set_ylabel('Mean Score Gain (%, 95% CI)', fontsize=12)
+    ax1.set_title('Mean Tutoring Gain per Topic', fontsize=12, fontweight='bold')
+    ax1.set_xticks(range(len(topics)))
+    ax1.set_xticklabels(topics, fontsize=11)
+
+    bars2 = ax2.bar(topics, ds, color=[TOPIC_COLORS[t] for t in topics],
+                    alpha=0.8, edgecolor='white', lw=1.5)
+    for b, d_val, p in zip(bars2, ds, ps):
+        star = sig_stars(p)
+        ax2.text(b.get_x() + b.get_width()/2,
+                 max(d_val, 0) + 0.03,
+                 f"d={d_val:.2f}\n{star}",
+                 ha='center', va='bottom', fontsize=10, fontweight='bold')
+    for thresh, label, ls in [(0.2, 'small', ':'), (0.5, 'medium', '--'), (0.8, 'large', '-')]:
+        ax2.axhline(thresh, color='gray', lw=1, ls=ls, alpha=0.5)
+        ax2.text(2.55, thresh + 0.02, label, fontsize=8, color='gray')
+    ax2.set_ylim(bottom=0)
+    ax2.set_ylabel("Cohen's d", fontsize=12)
+    ax2.set_title("Effect Size (Cohen's d) per Topic", fontsize=12, fontweight='bold')
+    ax2.set_xticks(range(len(topics)))
+    ax2.set_xticklabels(topics, fontsize=11)
+
+    fig.suptitle("G1 – Tutoring Effect per Topic", fontsize=14, fontweight='bold')
+    fig.tight_layout()
+    fig.savefig(PLOT_DIR / 'G1_effect_per_topic.png')
+    plt.close(fig)
+
+
+def plot_G2_slopes_per_topic(paired):
+    """Paired slope plots per topic (3 panels), with medium-colored lines."""
+    fig, axes = plt.subplots(1, 3, figsize=(18, 7), sharey=True)
+
+    for col_idx, topic in enumerate(TOPIC_ORDER):
+        ax = axes[col_idx]
+        sub = paired[paired['Topic'] == topic].sort_values('P_Num')
+
+        for _, r in sub.iterrows():
+            ax.plot([0, 1], [r['Pre_Score'], r['Post_Score']],
+                    color=MEDIUM_COLORS[r['Medium']], alpha=0.5, lw=1.5,
+                    marker='o', markersize=5)
+            ax.annotate(r['Participant'], (1.02, r['Post_Score']),
+                        fontsize=7, va='center', alpha=0.6)
+
+        for medium in MEDIUM_ORDER:
+            msub = sub[sub['Medium'] == medium]
+            if len(msub) > 0:
+                pm, qm = msub['Pre_Score'].mean(), msub['Post_Score'].mean()
+                ax.plot([0, 1], [pm, qm],
+                        color=MEDIUM_COLORS[medium], lw=3.5, marker='D', markersize=10,
+                        zorder=10, markeredgecolor='white', markeredgewidth=2,
+                        label=f'{medium} ({qm-pm:+.1f}%)')
+
+        if len(sub) >= 2:
+            g, sd, sem, d, t, p, n = compute_effect(sub)
+            star = sig_stars(p)
+            ax.text(0.5, 0.03,
+                    f'Overall: {g:+.1f}%  d={d:.2f}\nt={t:.2f}  p={p:.3f} {star}',
+                    transform=ax.transAxes, ha='center', fontsize=9,
+                    bbox=dict(boxstyle='round,pad=0.4', facecolor='lightyellow', alpha=0.9))
+
+        ax.set_xticks([0, 1])
+        ax.set_xticklabels(['Pre-Tutoring', 'Post-Tutoring'], fontsize=11)
+        ax.set_title(topic, fontsize=14, fontweight='bold', color=TOPIC_COLORS[topic])
+        ax.set_ylim(-5, 110)
+        ax.legend(fontsize=9, loc='upper left')
+
+    axes[0].set_ylabel('Test Score (%)', fontsize=12)
+    fig.suptitle('G2 – Paired Slopes by Topic (Medium-Colored Lines)',
+                 fontsize=14, fontweight='bold')
+    fig.tight_layout(rect=[0, 0, 1, 0.96])
+    fig.savefig(PLOT_DIR / 'G2_slopes_per_topic.png')
+    plt.close(fig)
+
+
+# =============================================================================
+# H. ALL MEDIUM × TOPIC COMBINATIONS (3×3 GRID)
+# =============================================================================
+
+def plot_H1_medium_topic_grid(paired):
+    """3×3 grid: rows = mediums, cols = topics. Each cell = slope plot with stats."""
+    fig, axes = plt.subplots(3, 3, figsize=(18, 16), sharey=True)
+
+    for row_idx, medium in enumerate(MEDIUM_ORDER):
+        for col_idx, topic in enumerate(TOPIC_ORDER):
+            ax = axes[row_idx][col_idx]
+            sub = paired[(paired['Medium'] == medium) &
+                         (paired['Topic'] == topic)].sort_values('P_Num')
+
+            for _, r in sub.iterrows():
+                ax.plot([0, 1], [r['Pre_Score'], r['Post_Score']],
+                        color=TOPIC_COLORS[topic], alpha=0.55, lw=1.5,
+                        marker='o', markersize=5)
+                ax.annotate(r['Participant'], (1.02, r['Post_Score']),
+                            fontsize=7, va='center', alpha=0.6)
+
+            if len(sub) >= 2:
+                pre_m, post_m = sub['Pre_Score'].mean(), sub['Post_Score'].mean()
+                ax.plot([0, 1], [pre_m, post_m],
+                        color=MEDIUM_COLORS[medium], lw=4, marker='D', markersize=11,
+                        zorder=10, markeredgecolor='white', markeredgewidth=2)
+
+                g, sd, sem, d, t, p, n = compute_effect(sub)
+                star = sig_stars(p)
+                ax.text(0.5, 0.03,
+                        f'n={n}  {g:+.1f}%\nd={d:.2f}  p={p:.3f} {star}',
+                        transform=ax.transAxes, ha='center', fontsize=8.5,
+                        bbox=dict(boxstyle='round,pad=0.3', facecolor='lightyellow', alpha=0.9))
+            elif len(sub) == 1:
+                r = sub.iloc[0]
+                pre_m, post_m = r['Pre_Score'], r['Post_Score']
+                ax.plot([0, 1], [pre_m, post_m],
+                        color=MEDIUM_COLORS[medium], lw=3, marker='D', markersize=10,
+                        zorder=10, markeredgecolor='white', markeredgewidth=2)
+                ax.text(0.5, 0.03, 'n=1 (no stats)', transform=ax.transAxes,
+                        ha='center', fontsize=8.5, color='gray')
+
+            ax.set_xticks([0, 1])
+            ax.set_xticklabels(['Pre', 'Post'], fontsize=9)
+            ax.set_ylim(-5, 110)
+
+            if col_idx == 0:
+                ax.set_ylabel(f'{medium}\nScore (%)', fontsize=10, fontweight='bold',
+                              color=MEDIUM_COLORS[medium])
+            if row_idx == 0:
+                ax.set_title(topic, fontsize=12, fontweight='bold',
+                             color=TOPIC_COLORS[topic])
+
+    fig.suptitle('H1 – Tutoring Slopes: All Medium × Topic Combinations',
+                 fontsize=15, fontweight='bold')
+    fig.tight_layout(rect=[0, 0, 0.97, 0.97])
+    fig.savefig(PLOT_DIR / 'H1_medium_topic_grid.png')
+    plt.close(fig)
+
+
+# =============================================================================
+# I. OUTLIER INFLUENCE ANALYSIS
+# =============================================================================
+
+def _flag_outliers(paired):
+    """Add 'Outlier' bool column based on IQR rule applied per medium."""
+    paired = paired.copy()
+    paired['Outlier'] = False
+    for m in MEDIUM_ORDER:
+        mask = paired['Medium'] == m
+        paired.loc[mask, 'Outlier'] = iqr_outlier_mask(paired.loc[mask, 'Score_Gain']).values
+    return paired
+
+
+def plot_I1_outlier_scatter(paired):
+    """Scatter of score gains per medium with outliers labeled."""
+    paired_f = _flag_outliers(paired)
+    fig, axes = plt.subplots(1, 3, figsize=(18, 6), sharey=True)
+
+    for col_idx, medium in enumerate(MEDIUM_ORDER):
+        ax = axes[col_idx]
+        sub = paired_f[paired_f['Medium'] == medium]
+
+        q1  = sub['Score_Gain'].quantile(0.25)
+        q3  = sub['Score_Gain'].quantile(0.75)
+        iqr = q3 - q1
+        lo  = q1 - 1.5 * iqr
+        hi  = q3 + 1.5 * iqr
+
+        ax.axhline(hi, color='#E53935', lw=1.5, ls='--', alpha=0.7, label=f'±1.5 IQR ({lo:.1f}–{hi:.1f})')
+        ax.axhline(lo, color='#E53935', lw=1.5, ls='--', alpha=0.7)
+        ax.axhline(0,  color='gray', lw=1, alpha=0.5)
+
+        rng = np.random.default_rng(42)
+        for _, r in sub.iterrows():
+            jit = rng.uniform(-0.12, 0.12)
+            color = '#E53935' if r['Outlier'] else MEDIUM_COLORS[medium]
+            ms    = 10 if r['Outlier'] else 7
+            ax.scatter(0.5 + jit, r['Score_Gain'], color=color, s=ms**2,
+                       alpha=0.8, edgecolors='white', lw=0.5, zorder=5)
+            if r['Outlier']:
+                lbl = f"{r['Participant']}\n({r['Topic'][:6]})"
+                ax.annotate(lbl, (0.5 + jit, r['Score_Gain']),
+                            fontsize=7.5, ha='center',
+                            xytext=(20 if jit > 0 else -20, 0),
+                            textcoords='offset points',
+                            arrowprops=dict(arrowstyle='->', color='#E53935', lw=0.8),
+                            color='#E53935', fontweight='bold')
+
+        n_out = sub['Outlier'].sum()
+        ax.set_xlim(0, 1)
+        ax.set_xticks([0.5])
+        ax.set_xticklabels([medium], fontsize=12)
+        ax.set_title(f'{medium}\n({n_out} outlier{"s" if n_out != 1 else ""})',
+                     fontsize=12, fontweight='bold', color=MEDIUM_COLORS[medium])
+        if col_idx == 0:
+            ax.set_ylabel('Score Gain (%)', fontsize=12)
+        ax.legend(fontsize=8, loc='upper right')
+
+    fig.suptitle('I1 – Score Gain Scatter with Outlier Flags (IQR Method)',
+                 fontsize=14, fontweight='bold')
+    fig.tight_layout()
+    fig.savefig(PLOT_DIR / 'I1_outlier_scatter.png')
+    plt.close(fig)
+
+
+def plot_I2_outlier_effect_comparison(paired):
+    """Cohen's d per medium: all data vs. outliers removed."""
+    paired_f = _flag_outliers(paired)
+    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
+
+    x = np.arange(len(MEDIUM_ORDER))
+    w = 0.35
+    all_ds, no_out_ds = [], []
+    all_gs, no_out_gs = [], []
+    all_ps, no_out_ps = [], []
+    all_ns, no_out_ns = [], []
+
+    for m in MEDIUM_ORDER:
+        sub_all = paired_f[paired_f['Medium'] == m]
+        sub_noo = sub_all[~sub_all['Outlier']]
+        g1, _, _, d1, t1, p1, n1 = compute_effect(sub_all)
+        g2, _, _, d2, t2, p2, n2 = compute_effect(sub_noo)
+        all_ds.append(d1 if not np.isnan(d1) else 0)
+        no_out_ds.append(d2 if not np.isnan(d2) else 0)
+        all_gs.append(g1); no_out_gs.append(g2)
+        all_ps.append(p1); no_out_ps.append(p2)
+        all_ns.append(n1); no_out_ns.append(n2)
+
+    for j, (label, ds, gs, ps, ns, alpha, hatch) in enumerate([
+        ('All Data',           all_ds,    all_gs,    all_ps,    all_ns,    0.80, ''),
+        ('Outliers Removed',   no_out_ds, no_out_gs, no_out_ps, no_out_ns, 0.45, '//'),
+    ]):
+        bars = ax1.bar(x + j*w - w/2, ds, w, label=label,
+                       color=[MEDIUM_COLORS[m] for m in MEDIUM_ORDER],
+                       alpha=alpha, hatch=hatch, edgecolor='white', lw=1.2)
+        for b, d_val, g, p, n in zip(bars, ds, gs, ps, ns):
+            star = sig_stars(p) if not np.isnan(p) else ''
+            ax1.text(b.get_x() + b.get_width()/2,
+                     max(d_val, 0) + 0.03,
+                     f'd={d_val:.2f}\n{g:+.1f}%\n{star}',
+                     ha='center', va='bottom', fontsize=8.5, fontweight='bold')
+
+    for thresh, lbl, ls in [(0.2,'small',':'), (0.5,'medium','--'), (0.8,'large','-')]:
+        ax1.axhline(thresh, color='gray', lw=1, ls=ls, alpha=0.5)
+        ax1.text(2.65, thresh + 0.02, lbl, fontsize=8, color='gray')
+    ax1.set_xticks(x); ax1.set_xticklabels(MEDIUM_ORDER, fontsize=12)
+    ax1.set_ylim(bottom=0)
+    ax1.set_ylabel("Cohen's d", fontsize=12)
+    ax1.set_title("Cohen's d: All Data vs. Outliers Removed", fontsize=12, fontweight='bold')
+    ax1.legend(fontsize=10)
+
+    # Delta d
+    delta_d = [no - al for al, no in zip(all_ds, no_out_ds)]
+    colors_d = ['#43A047' if dd >= 0 else '#E53935' for dd in delta_d]
+    bars2 = ax2.bar(MEDIUM_ORDER, delta_d, color=colors_d, alpha=0.8, edgecolor='white', lw=1.5)
+    for b, dd in zip(bars2, delta_d):
+        ax2.text(b.get_x() + b.get_width()/2,
+                 dd + (0.01 if dd >= 0 else -0.03),
+                 f'Δd={dd:+.3f}',
+                 ha='center', va='bottom' if dd >= 0 else 'top',
+                 fontsize=10, fontweight='bold')
+    ax2.axhline(0, color='gray', lw=1)
+    ax2.set_ylabel('Δ Cohen\'s d (Outliers Removed − All)', fontsize=12)
+    ax2.set_title('Change in Effect Size After Removing Outliers', fontsize=12, fontweight='bold')
+
+    fig.suptitle('I2 – Outlier Influence on Effect Sizes', fontsize=14, fontweight='bold')
+    fig.tight_layout()
+    fig.savefig(PLOT_DIR / 'I2_outlier_effect_comparison.png')
+    plt.close(fig)
+
+
+def plot_I3_outlier_heatmap(paired):
+    """Heatmap: which participant×topic pairs are outliers per medium."""
+    paired_f = _flag_outliers(paired)
+
+    fig, axes = plt.subplots(1, 3, figsize=(18, 8))
+
+    for col_idx, medium in enumerate(MEDIUM_ORDER):
+        ax = axes[col_idx]
+        sub = paired_f[paired_f['Medium'] == medium].copy()
+        sub['Label'] = sub['Participant'] + '\n' + sub['Topic'].str[:8]
+
+        # Build pivot: rows = participants sorted, cols = topics
+        pivot = sub.pivot_table(index='Participant', columns='Topic',
+                                values='Score_Gain', aggfunc='first')
+        pivot = pivot.reindex(columns=TOPIC_ORDER)
+        pivot = pivot.reindex(sorted(pivot.index, key=lambda x: int(x[1:])))
+
+        outlier_pivot = sub.pivot_table(index='Participant', columns='Topic',
+                                        values='Outlier', aggfunc='first')
+        outlier_pivot = outlier_pivot.reindex(columns=TOPIC_ORDER)
+        outlier_pivot = outlier_pivot.reindex(sorted(outlier_pivot.index,
+                                                      key=lambda x: int(x[1:])))
+
+        # Draw heatmap of score gain
+        sns.heatmap(pivot.astype(float), annot=True, fmt='.1f',
+                    cmap='RdYlGn', center=0, vmin=-40, vmax=60,
+                    linewidths=0.8, ax=ax, cbar_kws={'label': 'Score Gain %'},
+                    annot_kws={'size': 9})
+
+        # Overlay red border for outliers
+        for r_i, pid in enumerate(pivot.index):
+            for c_i, topic in enumerate(TOPIC_ORDER):
+                is_out = outlier_pivot.loc[pid, topic] if (pid in outlier_pivot.index and
+                         topic in outlier_pivot.columns) else False
+                if is_out:
+                    ax.add_patch(mpatches.Rectangle(
+                        (c_i, r_i), 1, 1,
+                        fill=False, edgecolor='#E53935', lw=3, zorder=5))
+
+        ax.set_title(f'{medium}', fontsize=13, fontweight='bold',
+                     color=MEDIUM_COLORS[medium])
+        ax.set_xlabel('Topic', fontsize=10)
+        ax.set_ylabel('Participant' if col_idx == 0 else '', fontsize=10)
+
+    fig.suptitle('I3 – Outlier Heatmap: Score Gain by Participant × Topic\n'
+                 '(Red border = IQR outlier within that medium)',
+                 fontsize=13, fontweight='bold')
+    fig.tight_layout()
+    fig.savefig(PLOT_DIR / 'I3_outlier_heatmap.png')
+    plt.close(fig)
+
+
+# =============================================================================
+# STATS EXPORT
+# =============================================================================
+
+def export_stats(paired):
+    paired_f = _flag_outliers(paired)
+
+    # --- F: effects by medium with/without Ökologie ---
+    rows_f = []
+    for m in MEDIUM_ORDER:
+        for cond_label, filter_fn in [
+            ('All Topics',     lambda sub: sub),
+            ('Excl_Oekologie', lambda sub: sub[sub['Topic'].isin(TOPICS_NO_OEK)]),
+        ]:
+            sub = filter_fn(paired[paired['Medium'] == m])
+            g, sd, sem, d, t, p, n = compute_effect(sub)
+            rows_f.append({
+                'Medium': m, 'Condition': cond_label, 'N': n,
+                'Mean_Gain': round(g, 3) if not np.isnan(g) else np.nan,
+                'SD_Gain': round(sd, 3) if not np.isnan(sd) else np.nan,
+                'Cohens_d': round(d, 3) if not np.isnan(d) else np.nan,
+                't_stat': round(t, 3) if not np.isnan(t) else np.nan,
+                'p_value': round(p, 4) if not np.isnan(p) else np.nan,
+            })
+    pd.DataFrame(rows_f).to_csv(
+        STATS_DIR / 'effects_by_medium_with_without_oekologie.csv', index=False)
+
+    # --- G: effects by topic ---
+    rows_g = []
+    for topic in TOPIC_ORDER:
+        sub = paired[paired['Topic'] == topic]
+        g, sd, sem, d, t, p, n = compute_effect(sub)
+        rows_g.append({
+            'Topic': topic, 'N': n,
+            'Mean_Gain': round(g, 3),
+            'SD_Gain': round(sd, 3),
+            'Cohens_d': round(d, 3) if not np.isnan(d) else np.nan,
+            't_stat': round(t, 3) if not np.isnan(t) else np.nan,
+            'p_value': round(p, 4) if not np.isnan(p) else np.nan,
+        })
+    pd.DataFrame(rows_g).to_csv(STATS_DIR / 'effects_by_topic.csv', index=False)
+
+    # --- H: effects by medium × topic ---
+    rows_h = []
+    for m in MEDIUM_ORDER:
+        for topic in TOPIC_ORDER:
+            sub = paired[(paired['Medium'] == m) & (paired['Topic'] == topic)]
+            g, sd, sem, d, t, p, n = compute_effect(sub)
+            rows_h.append({
+                'Medium': m, 'Topic': topic, 'N': n,
+                'Mean_Gain': round(g, 3) if not np.isnan(g) else np.nan,
+                'SD_Gain': round(sd, 3) if not np.isnan(sd) else np.nan,
+                'Cohens_d': round(d, 3) if not np.isnan(d) else np.nan,
+                't_stat': round(t, 3) if not np.isnan(t) else np.nan,
+                'p_value': round(p, 4) if not np.isnan(p) else np.nan,
+            })
+    pd.DataFrame(rows_h).to_csv(STATS_DIR / 'effects_by_medium_topic_grid.csv', index=False)
+
+    # --- I: outlier influence ---
+    rows_i = []
+    for m in MEDIUM_ORDER:
+        sub_all = paired_f[paired_f['Medium'] == m]
+        sub_noo = sub_all[~sub_all['Outlier']]
+        g1, sd1, _, d1, t1, p1, n1 = compute_effect(sub_all)
+        g2, sd2, _, d2, t2, p2, n2 = compute_effect(sub_noo)
+        outliers = sub_all[sub_all['Outlier']][['Participant', 'Topic', 'Score_Gain']]
+        out_list = '; '.join(f"{r['Participant']}/{r['Topic']}({r['Score_Gain']:+.1f}%)"
+                             for _, r in outliers.iterrows())
+        rows_i.append({
+            'Medium': m,
+            'N_all': n1, 'Mean_Gain_all': round(g1, 3), 'Cohens_d_all': round(d1, 3) if not np.isnan(d1) else np.nan,
+            'p_all': round(p1, 4) if not np.isnan(p1) else np.nan,
+            'N_no_outliers': n2, 'Mean_Gain_no_outliers': round(g2, 3),
+            'Cohens_d_no_outliers': round(d2, 3) if not np.isnan(d2) else np.nan,
+            'p_no_outliers': round(p2, 4) if not np.isnan(p2) else np.nan,
+            'Delta_d': round((d2 - d1) if not (np.isnan(d1) or np.isnan(d2)) else np.nan, 3),
+            'Outliers': out_list,
+        })
+    pd.DataFrame(rows_i).to_csv(STATS_DIR / 'outlier_influence.csv', index=False)
+
+    print(f"  Stats exported to: {STATS_DIR}")
+
+
+# =============================================================================
+# MAIN
+# =============================================================================
+
+def main():
+    print("Loading data...")
+    df     = load_data()
+    paired = build_paired_tutoring(df)
+    print(f"  {len(paired)} paired tutoring entries across "
+          f"{paired['Participant'].nunique()} participants\n")
+
+    sections = [
+        ("F. Effect Without Ökologie (vs. With)", [
+            ("F1", "Cohen's d comparison by medium",          lambda: plot_F1_cohens_d_comparison(paired)),
+            ("F2", "Mean score gain comparison",              lambda: plot_F2_mean_gain_comparison(paired)),
+            ("F3", "Paired slopes 2×3 grid",                  lambda: plot_F3_paired_slopes_comparison(paired)),
+            ("F4", "Gain distribution comparison",            lambda: plot_F4_gain_distribution_comparison(paired)),
+            ("F5", "Descriptive stats table",                 lambda: plot_F5_stats_table(paired)),
+        ]),
+        ("G. Effect Per Topic", [
+            ("G1", "Bar chart: gain + d per topic",           lambda: plot_G1_effect_per_topic(paired)),
+            ("G2", "Paired slopes per topic",                 lambda: plot_G2_slopes_per_topic(paired)),
+        ]),
+        ("H. Medium × Topic Grid", [
+            ("H1", "3×3 grid of slope plots",                 lambda: plot_H1_medium_topic_grid(paired)),
+        ]),
+        ("I. Outlier Influence Analysis", [
+            ("I1", "Outlier scatter per medium",              lambda: plot_I1_outlier_scatter(paired)),
+            ("I2", "Effect size: all vs. outliers removed",   lambda: plot_I2_outlier_effect_comparison(paired)),
+            ("I3", "Outlier heatmap (participant × topic)",   lambda: plot_I3_outlier_heatmap(paired)),
+        ]),
+    ]
+
+    for section_name, plots in sections:
+        print(section_name)
+        for code, desc, fn in plots:
+            fn()
+            print(f"  [{code}] {desc}")
+
+    print(f"\n11 plots saved to: {PLOT_DIR}")
+
+    print("\nExporting statistics...")
+    export_stats(paired)
+    print("Done.")
+
+
+if __name__ == "__main__":
+    main()