added comparison without ökologie

This commit is contained in:
2026-02-28 16:12:09 +01:00
parent 9e50db2da6
commit b1039e6a7f
48 changed files with 977 additions and 2 deletions

Binary file not shown.

Before

Width:  |  Height:  |  Size: 87 KiB

After

Width:  |  Height:  |  Size: 93 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 129 KiB

After

Width:  |  Height:  |  Size: 135 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 146 KiB

After

Width:  |  Height:  |  Size: 154 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 206 KiB

After

Width:  |  Height:  |  Size: 208 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 240 KiB

After

Width:  |  Height:  |  Size: 255 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 257 KiB

After

Width:  |  Height:  |  Size: 270 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 92 KiB

After

Width:  |  Height:  |  Size: 103 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 85 KiB

After

Width:  |  Height:  |  Size: 95 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 164 KiB

After

Width:  |  Height:  |  Size: 191 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 315 KiB

After

Width:  |  Height:  |  Size: 323 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 110 KiB

After

Width:  |  Height:  |  Size: 126 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 215 KiB

After

Width:  |  Height:  |  Size: 230 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 151 KiB

After

Width:  |  Height:  |  Size: 158 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 130 KiB

After

Width:  |  Height:  |  Size: 139 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 102 KiB

After

Width:  |  Height:  |  Size: 114 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 126 KiB

After

Width:  |  Height:  |  Size: 133 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 94 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 71 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 420 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 144 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 104 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 89 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 277 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 344 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 87 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 131 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 146 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 56 KiB

After

Width:  |  Height:  |  Size: 55 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 73 KiB

After

Width:  |  Height:  |  Size: 73 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 66 KiB

After

Width:  |  Height:  |  Size: 66 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 52 KiB

After

Width:  |  Height:  |  Size: 52 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 124 KiB

After

Width:  |  Height:  |  Size: 127 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 59 KiB

After

Width:  |  Height:  |  Size: 58 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 64 KiB

After

Width:  |  Height:  |  Size: 65 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 51 KiB

After

Width:  |  Height:  |  Size: 52 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 51 KiB

After

Width:  |  Height:  |  Size: 51 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 88 KiB

After

Width:  |  Height:  |  Size: 87 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 144 KiB

After

Width:  |  Height:  |  Size: 146 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 233 KiB

After

Width:  |  Height:  |  Size: 228 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 102 KiB

After

Width:  |  Height:  |  Size: 97 KiB

View File

@ -0,0 +1,10 @@
Medium,Topic,N,Mean_Gain,SD_Gain,Cohens_d,t_stat,p_value
Chat,Mendel,6,14.467,11.5,1.258,-3.082,0.0274
Chat,DNA-Replikation,6,22.217,20.952,1.06,-2.597,0.0484
Chat,Ökologie,6,-3.35,3.67,-0.913,2.236,0.0756
Video,Mendel,6,4.417,3.421,1.291,-3.162,0.025
Video,DNA-Replikation,6,16.667,32.092,0.519,-1.272,0.2593
Video,Ökologie,6,-0.017,8.443,-0.002,0.005,0.9963
VR,Mendel,6,14.45,12.959,1.115,-2.731,0.0412
VR,DNA-Replikation,6,11.117,13.134,0.846,-2.073,0.0929
VR,Ökologie,6,15.55,36.21,0.429,-1.052,0.341
1 Medium Topic N Mean_Gain SD_Gain Cohens_d t_stat p_value
2 Chat Mendel 6 14.467 11.5 1.258 -3.082 0.0274
3 Chat DNA-Replikation 6 22.217 20.952 1.06 -2.597 0.0484
4 Chat Ökologie 6 -3.35 3.67 -0.913 2.236 0.0756
5 Video Mendel 6 4.417 3.421 1.291 -3.162 0.025
6 Video DNA-Replikation 6 16.667 32.092 0.519 -1.272 0.2593
7 Video Ökologie 6 -0.017 8.443 -0.002 0.005 0.9963
8 VR Mendel 6 14.45 12.959 1.115 -2.731 0.0412
9 VR DNA-Replikation 6 11.117 13.134 0.846 -2.073 0.0929
10 VR Ökologie 6 15.55 36.21 0.429 -1.052 0.341

View File

@ -0,0 +1,7 @@
Medium,Condition,N,Mean_Gain,SD_Gain,Cohens_d,t_stat,p_value
Chat,All Topics,18,11.111,17.125,0.649,-2.753,0.0136
Chat,Excl_Oekologie,12,18.342,16.614,1.104,-3.824,0.0028
Video,All Topics,18,7.022,19.494,0.36,-1.528,0.1448
Video,Excl_Oekologie,12,10.542,22.68,0.465,-1.61,0.1357
VR,All Topics,18,13.706,22.125,0.619,-2.628,0.0176
VR,Excl_Oekologie,12,12.783,12.561,1.018,-3.525,0.0048
1 Medium Condition N Mean_Gain SD_Gain Cohens_d t_stat p_value
2 Chat All Topics 18 11.111 17.125 0.649 -2.753 0.0136
3 Chat Excl_Oekologie 12 18.342 16.614 1.104 -3.824 0.0028
4 Video All Topics 18 7.022 19.494 0.36 -1.528 0.1448
5 Video Excl_Oekologie 12 10.542 22.68 0.465 -1.61 0.1357
6 VR All Topics 18 13.706 22.125 0.619 -2.628 0.0176
7 VR Excl_Oekologie 12 12.783 12.561 1.018 -3.525 0.0048

View File

@ -0,0 +1,4 @@
Topic,N,Mean_Gain,SD_Gain,Cohens_d,t_stat,p_value
Mendel,18,11.111,10.745,1.034,-4.387,0.0004
DNA-Replikation,18,16.667,22.461,0.742,-3.148,0.0059
Ökologie,18,4.061,21.964,0.185,-0.784,0.4436
1 Topic N Mean_Gain SD_Gain Cohens_d t_stat p_value
2 Mendel 18 11.111 10.745 1.034 -4.387 0.0004
3 DNA-Replikation 18 16.667 22.461 0.742 -3.148 0.0059
4 Ökologie 18 4.061 21.964 0.185 -0.784 0.4436

View File

@ -0,0 +1,4 @@
Medium,N_all,Mean_Gain_all,Cohens_d_all,p_all,N_no_outliers,Mean_Gain_no_outliers,Cohens_d_no_outliers,p_no_outliers,Delta_d,Outliers
Chat,18,11.111,0.649,0.0136,17,8.235,0.665,0.0145,0.016,P8/DNA-Replikation(+60.0%)
Video,18,7.022,0.36,0.1448,16,1.225,0.166,0.5178,-0.195,P3/DNA-Replikation(+33.4%); P10/DNA-Replikation(+73.4%)
VR,18,13.706,0.619,0.0176,17,9.412,0.727,0.0085,0.108,P3/Ökologie(+86.7%)
1 Medium N_all Mean_Gain_all Cohens_d_all p_all N_no_outliers Mean_Gain_no_outliers Cohens_d_no_outliers p_no_outliers Delta_d Outliers
2 Chat 18 11.111 0.649 0.0136 17 8.235 0.665 0.0145 0.016 P8/DNA-Replikation(+60.0%)
3 Video 18 7.022 0.36 0.1448 16 1.225 0.166 0.5178 -0.195 P3/DNA-Replikation(+33.4%); P10/DNA-Replikation(+73.4%)
4 VR 18 13.706 0.619 0.0176 17 9.412 0.727 0.0085 0.108 P3/Ökologie(+86.7%)

View File

@ -286,6 +286,101 @@ Scatter plots of each Big Five trait against tutoring score gain, with regressio
---
## G. Effect Analysis
> Generated by `generate_plots_effects.py` → `Data/plots_effects/`
> Statistical exports → `Data/stats/effects_*.csv`, `Data/stats/outlier_influence.csv`
---
### G-F. Effect Without Ökologie (vs. With)
Ökologie has markedly higher pre-tutoring baselines (ceiling effects), which compresses gains for that topic. This section quantifies how much those ceiling effects suppress the observed effect sizes, and presents a full side-by-side comparison of all mediums with and without Ökologie included.
#### GF1 Cohen's d Comparison by Medium
![Cohen's d comparison](Data/plots_effects/F1_cohens_d_comparison.png)
Grouped bar chart of Cohen's d per medium under two conditions: All Topics and Excl. Ökologie. Each bar is annotated with the raw mean gain and significance stars. Reference lines mark the conventional small (0.2), medium (0.5), and large (0.8) effect size thresholds. Removing Ökologie consistently raises effect sizes for all mediums, with VR showing the largest absolute shift.
#### GF2 Mean Score Gain Comparison
![Mean score gain comparison](Data/plots_effects/F2_mean_gain_comparison.png)
95% CI bar chart of the raw mean tutoring score gain per medium, both conditions overlaid. Shows the absolute gain shift when Ökologie is excluded. VR and Chat benefit most from exclusion; Video's gain changes least, indicating Video's Ökologie sessions were not as strongly ceiling-affected.
#### GF3 Paired Slopes: All Topics vs. Excl. Ökologie
![Paired slopes comparison](Data/plots_effects/F3_paired_slopes_comparison.png)
A 2×3 grid (rows: All Topics / Excl. Ökologie; columns: Chat / Video / VR). Each panel shows individual PrePost-Tutoring lines colored by topic, the medium mean trajectory (thick diamond marker), and annotated t-test / Cohen's d / p-value. The bottom row directly reveals the cleaner separation in trajectories once the near-zero Ökologie gains are removed.
#### GF4 Gain Distribution Comparison
![Gain distribution comparison](Data/plots_effects/F4_gain_distribution_comparison.png)
Side-by-side violin + box plots per medium, two per medium (All Topics / Excl. Ökologie). Shows the shift in median, spread, and the location of extreme values. For VR in particular, removing Ökologie tightens the distribution and raises the median, confirming Ökologie's pull toward zero.
#### GF5 Descriptive Statistics Table
![Stats table](Data/plots_effects/F5_stats_table.png)
Rendered table summarizing N, mean gain, SD, Cohen's d, t-statistic, and p-value for all 6 conditions (3 mediums × 2 topic sets) in one view.
---
### G-G. Effect Per Topic
Full effect-size breakdown for each of the three topics independently, across all mediums combined.
#### GG1 Effect Per Topic (Gain + Cohen's d)
![Effect per topic](Data/plots_effects/G1_effect_per_topic.png)
Left panel: mean tutoring score gain with 95% CI error bars per topic, annotated with N and significance. Right panel: Cohen's d per topic with threshold reference lines. DNA-Replikation yields the largest effect (high starting deficit large gain), Mendel is intermediate, and Ökologie is smallest (ceiling effects).
#### GG2 Paired Slopes per Topic
![Paired slopes per topic](Data/plots_effects/G2_slopes_per_topic.png)
Three-panel slope plot (one per topic), with lines colored by medium. Medium mean trajectories are drawn as thick diamond markers and labeled with per-medium gains. The overall t-test / d / p annotation summarizes the within-topic effect. Ökologie clearly shows compressed trajectories compared to DNA-Replikation.
---
### G-H. All Medium × Topic Combinations
#### GH1 3×3 Slope Grid (Medium × Topic)
![3x3 slope grid](Data/plots_effects/H1_medium_topic_grid.png)
A 3×3 grid with rows = mediums (Chat, Video, VR) and columns = topics (Mendel, DNA-Replikation, Ökologie). Each of the 9 cells shows individual participant PrePost-Tutoring slope lines (colored by topic), the medium mean (thick line), and the annotated effect size (d, p, n). This is the most granular view: VR × DNA-Replikation shows the largest gains while all mediums × Ökologie show compressed or near-zero gains.
---
### G-I. Outlier Influence Analysis
Outliers are defined using the 1.5×IQR rule applied per medium on tutoring `Score_Gain`.
#### GI1 Score Gain Scatter with Outlier Flags
![Outlier scatter](Data/plots_effects/I1_outlier_scatter.png)
Jittered scatter of individual score gains per medium. IQR fence lines 1.5×IQR) are marked in red. Outlier points are highlighted in red and labeled with participant ID and topic name. P3/Ökologie (VR, +86.7 pp) is the most extreme single data point.
#### GI2 Outlier Influence on Effect Sizes
![Outlier effect comparison](Data/plots_effects/I2_outlier_effect_comparison.png)
Left: grouped bar chart of Cohen's d with All Data vs. Outliers Removed, annotated with raw gains and significance. Right: Δd bar chart showing the change in effect size after outlier removal per medium. A positive Δd means the outlier(s) were suppressing the true effect; a negative Δd means they were inflating it.
#### GI3 Outlier Heatmap (Participant × Topic per Medium)
![Outlier heatmap](Data/plots_effects/I3_outlier_heatmap.png)
Heatmap of tutoring score gain for each participant × topic cell, one panel per medium. Color encodes gain magnitude (redyellowgreen). Cells with a red border are IQR outliers within that medium's distribution. Allows immediate identification of which participant-topic combinations drive extreme results.
---
## F. Questionnaire Analysis
> Questionnaires were administered at multiple phases: Pre-Reading, Post-Reading, Pre-Tutoring, and Post-Tutoring.

View File

@ -30,7 +30,7 @@ from scipy import stats
# =============================================================================
# CONFIG
# =============================================================================
BASE = Path(r"F:\GitHub Projekte\VirTu-Eval\Data")
BASE = Path(__file__).resolve().parent / "Data"
PLOT_DIR = BASE / "plots"
PLOT_DIR.mkdir(exist_ok=True)
STATS_DIR = BASE / "stats"

855
generate_plots_effects.py Normal file
View File

@ -0,0 +1,855 @@
"""
generate_plots_effects.py
Effect-focused analysis for VirTu-Eval experiment data.
Generates plots into Data/plots_effects/ organized by section:
F. Effect Without Ökologie (vs. With) 5 plots
G. Effect Per Topic 2 plots
H. All Medium × Topic Combinations 1 plot (3×3 grid)
I. Outlier Influence Analysis 3 plots
Usage:
python generate_plots_effects.py
"""
import pandas as pd
import numpy as np
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from matplotlib.lines import Line2D
import seaborn as sns
from pathlib import Path
from scipy import stats
# =============================================================================
# CONFIG
# =============================================================================
BASE = Path("Data")
PLOT_DIR = BASE / "plots_effects"
PLOT_DIR.mkdir(parents=True, exist_ok=True)
STATS_DIR = BASE / "stats"
STATS_DIR.mkdir(exist_ok=True)
PHASE_ORDER = ['Pre-Reading', 'Post-Reading', 'Pre-Tutoring', 'Post-Tutoring']
PHASE_LABELS = ['Pre-Read', 'Post-Read', 'Pre-Tutor', 'Post-Tutor']
PHASE_SHORT = dict(zip(PHASE_ORDER, PHASE_LABELS))
MEDIUM_ORDER = ['Chat', 'Video', 'VR']
MEDIUM_COLORS = {'Chat': '#2196F3', 'Video': '#FF9800', 'VR': '#4CAF50'}
TOPIC_ORDER = ['Mendel', 'DNA-Replikation', 'Ökologie']
TOPIC_COLORS = {'Mendel': '#E91E63', 'DNA-Replikation': '#9C27B0', 'Ökologie': '#009688'}
TOPICS_NO_OEK = ['Mendel', 'DNA-Replikation']
sns.set_theme(style="whitegrid", font_scale=1.05)
plt.rcParams['figure.dpi'] = 150
plt.rcParams['savefig.bbox'] = 'tight'
# =============================================================================
# HELPERS
# =============================================================================
def cohens_d(pre, post):
diff = post - pre
return diff.mean() / diff.std(ddof=1) if diff.std(ddof=1) > 0 else 0.0
def sig_stars(p):
if p < 0.001:
return '***'
elif p < 0.01:
return '**'
elif p < 0.05:
return '*'
return 'n.s.'
def compute_effect(sub):
"""Return (mean_gain, sd_gain, sem_gain, d, t, p, n) for a paired subset."""
pre = sub['Pre_Score']
post = sub['Post_Score']
n = len(sub)
if n < 2:
return sub['Score_Gain'].mean(), sub['Score_Gain'].std(), np.nan, np.nan, np.nan, np.nan, n
t, p = stats.ttest_rel(pre, post)
d = cohens_d(pre, post)
g = sub['Score_Gain']
return g.mean(), g.std(ddof=1), g.sem(), d, t, p, n
def iqr_outlier_mask(series):
"""Return boolean Series: True where value is an IQR outlier (1.5×IQR rule)."""
q1, q3 = series.quantile(0.25), series.quantile(0.75)
iqr = q3 - q1
return (series < q1 - 1.5 * iqr) | (series > q3 + 1.5 * iqr)
# =============================================================================
# DATA LOADING
# =============================================================================
def load_data():
df = pd.read_csv(BASE / "test_scores_all.csv", encoding="utf-8-sig")
df['Zeitpunkt'] = df['Zeitpunkt'].str.strip().replace('Pre-Tutor', 'Pre-Tutoring')
df['Phase'] = pd.Categorical(df['Zeitpunkt'], categories=PHASE_ORDER, ordered=True)
df['P_Num'] = df['Participant'].str.extract(r'(\d+)').astype(int)
return df
def build_paired_tutoring(df):
pre = df[df['Zeitpunkt'] == 'Pre-Tutoring'][
['Participant', 'Topic', 'Medium', 'Score_Pct', 'Avg_Confidence']].copy()
post = df[df['Zeitpunkt'] == 'Post-Tutoring'][
['Participant', 'Topic', 'Medium', 'Score_Pct', 'Avg_Confidence']].copy()
pre.columns = ['Participant', 'Topic', 'Medium', 'Pre_Score', 'Pre_Conf']
post.columns = ['Participant', 'Topic', 'Medium', 'Post_Score', 'Post_Conf']
paired = pre.merge(post, on=['Participant', 'Topic', 'Medium'])
paired['Score_Gain'] = paired['Post_Score'] - paired['Pre_Score']
paired['Conf_Gain'] = paired['Post_Conf'] - paired['Pre_Conf']
paired['P_Num'] = paired['Participant'].str.extract(r'(\d+)').astype(int)
return paired
# =============================================================================
# F. EFFECT WITHOUT ÖKOLOGIE (vs. WITH)
# =============================================================================
def plot_F1_cohens_d_comparison(paired):
"""Bar chart: Cohen's d per medium All Topics vs. Excl. Ökologie."""
fig, ax = plt.subplots(figsize=(10, 6))
x = np.arange(len(MEDIUM_ORDER))
w = 0.35
for j, (label, use_all, hatch) in enumerate([
('All Topics', True, ''),
('Excl. Ökologie', False, '//'),
]):
ds, gs, ps, ns = [], [], [], []
for m in MEDIUM_ORDER:
sub = paired[paired['Medium'] == m]
sub_f = sub if use_all else sub[sub['Topic'].isin(TOPICS_NO_OEK)]
g, sd, sem, d, t, p, n = compute_effect(sub_f)
ds.append(d if not np.isnan(d) else 0)
gs.append(g)
ps.append(p if not np.isnan(p) else 1)
ns.append(n)
bars = ax.bar(x + j*w - w/2, ds, w, label=label,
color=[MEDIUM_COLORS[m] for m in MEDIUM_ORDER],
alpha=0.85 if j == 0 else 0.45,
hatch=hatch, edgecolor='white', linewidth=1.2)
for i, (b, g, p, d_val) in enumerate(zip(bars, gs, ps, ds)):
star = sig_stars(p)
ax.text(b.get_x() + b.get_width()/2,
max(d_val, 0) + 0.04,
f'd={d_val:.2f}\n{g:+.1f}%\n{star}',
ha='center', va='bottom', fontsize=8.5, fontweight='bold',
color='#333333')
ax.axhline(0.2, color='gray', lw=1, ls=':', alpha=0.6)
ax.axhline(0.5, color='gray', lw=1, ls='--', alpha=0.6)
ax.axhline(0.8, color='gray', lw=1, ls='-', alpha=0.4)
ax.text(2.65, 0.21, 'small', fontsize=8, color='gray', va='bottom')
ax.text(2.65, 0.51, 'medium', fontsize=8, color='gray', va='bottom')
ax.text(2.65, 0.81, 'large', fontsize=8, color='gray', va='bottom')
ax.set_xticks(x)
ax.set_xticklabels(MEDIUM_ORDER, fontsize=12)
ax.set_ylabel("Cohen's d (tutoring score gain)", fontsize=12)
ax.set_ylim(bottom=0)
ax.legend(fontsize=11)
ax.set_title("F1 Effect Sizes by Medium: All Topics vs. Excl. Ökologie",
fontsize=13, fontweight='bold')
fig.tight_layout()
fig.savefig(PLOT_DIR / 'F1_cohens_d_comparison.png')
plt.close(fig)
def plot_F2_mean_gain_comparison(paired):
"""Bar chart with 95% CI: mean score gain per medium All vs. Excl. Ökologie."""
fig, ax = plt.subplots(figsize=(10, 6))
x = np.arange(len(MEDIUM_ORDER))
w = 0.35
for j, (label, filter_fn, alpha, hatch) in enumerate([
('All Topics', lambda sub: sub, 0.80, ''),
('Excl. Ökologie', lambda sub: sub[sub['Topic'].isin(TOPICS_NO_OEK)], 0.45, '//'),
]):
means, cis = [], []
for m in MEDIUM_ORDER:
sub_f = filter_fn(paired[paired['Medium'] == m])
g, sd, sem, d, t, p, n = compute_effect(sub_f)
means.append(g)
cis.append(sem * 1.96)
bars = ax.bar(x + j*w - w/2, means, w, label=label,
color=[MEDIUM_COLORS[m] for m in MEDIUM_ORDER],
alpha=alpha, hatch=hatch, edgecolor='white', linewidth=1.2,
yerr=cis, capsize=5, error_kw=dict(lw=1.5, capthick=1.5))
for b, g in zip(bars, means):
ax.text(b.get_x() + b.get_width()/2,
g + (b.get_height() * 0.05 if g >= 0 else -2),
f'{g:+.1f}%',
ha='center', va='bottom', fontsize=9, fontweight='bold',
color='#333333')
ax.axhline(0, color='gray', lw=1)
ax.set_xticks(x)
ax.set_xticklabels(MEDIUM_ORDER, fontsize=12)
ax.set_ylabel('Mean Score Gain (%, 95% CI)', fontsize=12)
ax.legend(fontsize=11)
ax.set_title('F2 Mean Score Gain by Medium: All Topics vs. Excl. Ökologie',
fontsize=13, fontweight='bold')
fig.tight_layout()
fig.savefig(PLOT_DIR / 'F2_mean_gain_comparison.png')
plt.close(fig)
def plot_F3_paired_slopes_comparison(paired):
"""2×3 grid: top row = All Topics, bottom row = Excl. Ökologie."""
conditions = [
('All Topics', paired, 0),
('Excl. Ökologie', paired[paired['Topic'].isin(TOPICS_NO_OEK)], 1),
]
fig, axes = plt.subplots(2, 3, figsize=(18, 12), sharey=True)
for row_idx, (cond_label, data, row) in enumerate(conditions):
for col_idx, medium in enumerate(MEDIUM_ORDER):
ax = axes[row][col_idx]
sub = data[data['Medium'] == medium].sort_values('P_Num')
for _, r in sub.iterrows():
ax.plot([0, 1], [r['Pre_Score'], r['Post_Score']],
color=TOPIC_COLORS[r['Topic']], alpha=0.55, lw=1.5,
marker='o', markersize=5)
ax.annotate(r['Participant'], (1.02, r['Post_Score']),
fontsize=7, va='center', alpha=0.6)
if len(sub) >= 2:
pre_m, post_m = sub['Pre_Score'].mean(), sub['Post_Score'].mean()
ax.plot([0, 1], [pre_m, post_m],
color=MEDIUM_COLORS[medium], lw=4, marker='D',
markersize=12, zorder=10,
markeredgecolor='white', markeredgewidth=2)
g, sd, sem, d, t, p, n = compute_effect(sub)
star = sig_stars(p)
ax.text(0.5, 0.03,
f'n={n} Gain: {g:+.1f}%\nd={d:.2f} t={t:.2f} p={p:.3f} {star}',
transform=ax.transAxes, ha='center', fontsize=9,
bbox=dict(boxstyle='round,pad=0.4', facecolor='lightyellow', alpha=0.9))
ax.set_xticks([0, 1])
ax.set_xticklabels(['Pre-Tutoring', 'Post-Tutoring'], fontsize=10)
ax.set_ylim(-5, 110)
title_color = MEDIUM_COLORS[medium]
if col_idx == 0:
ax.set_ylabel(f'{cond_label}\nTest Score (%)', fontsize=10, fontweight='bold')
if row == 0:
ax.set_title(medium, fontsize=13, fontweight='bold', color=title_color)
legend_els = [Line2D([0],[0], color=TOPIC_COLORS[t], lw=2, marker='o', ms=6, label=t)
for t in TOPIC_ORDER]
legend_els.append(Line2D([0],[0], color='gray', lw=4, marker='D', ms=8, label='Medium Mean'))
fig.legend(handles=legend_els, loc='lower center', ncol=4, fontsize=10,
bbox_to_anchor=(0.5, 0.01))
fig.suptitle('F3 Paired Slopes: All Topics (top) vs. Excl. Ökologie (bottom)',
fontsize=14, fontweight='bold')
fig.tight_layout(rect=[0, 0.05, 1, 0.97])
fig.savefig(PLOT_DIR / 'F3_paired_slopes_comparison.png')
plt.close(fig)
def plot_F4_gain_distribution_comparison(paired):
"""Side-by-side violin+box plots per medium: All Topics vs. Excl. Ökologie."""
fig, axes = plt.subplots(1, 3, figsize=(18, 7), sharey=True)
for col_idx, medium in enumerate(MEDIUM_ORDER):
ax = axes[col_idx]
data_all = paired[paired['Medium'] == medium]['Score_Gain'].values
data_noe = paired[(paired['Medium'] == medium) &
(paired['Topic'].isin(TOPICS_NO_OEK))]['Score_Gain'].values
positions = [0.8, 2.2]
colors = [MEDIUM_COLORS[medium], MEDIUM_COLORS[medium]]
alphas = [0.75, 0.40]
labels_vp = ['All Topics', 'Excl. Ökologie']
for pos, data, alpha, lbl in zip(positions,
[data_all, data_noe],
alphas, labels_vp):
if len(data) < 2:
continue
parts = ax.violinplot(data, positions=[pos], widths=0.9,
showmedians=False, showextrema=False)
for pc in parts['bodies']:
pc.set_facecolor(MEDIUM_COLORS[medium])
pc.set_alpha(alpha)
bp = ax.boxplot(data, positions=[pos], widths=0.35,
patch_artist=True, showmeans=True, notch=False,
meanprops=dict(marker='D', markerfacecolor='black',
markeredgecolor='white', markersize=7),
medianprops=dict(color='white', lw=2),
boxprops=dict(facecolor=MEDIUM_COLORS[medium], alpha=alpha))
g, sd, sem, d, t, p, n = compute_effect(
paired[paired['Medium'] == medium] if lbl == 'All Topics'
else paired[(paired['Medium'] == medium) & paired['Topic'].isin(TOPICS_NO_OEK)]
)
star = sig_stars(p)
ax.text(pos, np.nanmax(data) + 4,
f'n={n}\nM={g:+.1f}%\nd={d:.2f} {star}',
ha='center', va='bottom', fontsize=8.5, fontweight='bold')
ax.axhline(0, color='gray', lw=1, ls='--', alpha=0.6)
ax.set_xticks(positions)
ax.set_xticklabels(['All\nTopics', 'Excl.\nÖkologie'], fontsize=10)
ax.set_title(medium, fontsize=13, fontweight='bold', color=MEDIUM_COLORS[medium])
if col_idx == 0:
ax.set_ylabel('Score Gain (%)', fontsize=12)
fig.suptitle('F4 Gain Distributions: All Topics vs. Excl. Ökologie',
fontsize=14, fontweight='bold')
fig.tight_layout()
fig.savefig(PLOT_DIR / 'F4_gain_distribution_comparison.png')
plt.close(fig)
def plot_F5_stats_table(paired):
"""Rendered table: N, mean gain, SD, d, t, p for each medium × condition."""
fig, ax = plt.subplots(figsize=(14, 5))
ax.axis('off')
rows = []
for m in MEDIUM_ORDER:
for cond_label, filter_fn in [
('All Topics', lambda sub, _m=m: paired[paired['Medium'] == _m]),
('Excl. Ökologie', lambda sub, _m=m: paired[(paired['Medium'] == _m) &
paired['Topic'].isin(TOPICS_NO_OEK)]),
]:
sub = filter_fn(None)
g, sd, sem, d, t, p, n = compute_effect(sub)
star = sig_stars(p) if not np.isnan(p) else ''
rows.append([
m, cond_label, str(n),
f'{g:+.2f}', f'{sd:.2f}',
f'{d:.3f}' if not np.isnan(d) else '',
f'{t:.3f}' if not np.isnan(t) else '',
f'{p:.3f}{star}' if not np.isnan(p) else '',
])
col_labels = ['Medium', 'Condition', 'N', 'Mean Gain (%)', 'SD',
"Cohen's d", 't-stat', 'p-value']
table = ax.table(cellText=rows, colLabels=col_labels,
loc='center', cellLoc='center')
table.auto_set_font_size(False)
table.set_fontsize(11)
table.scale(1.0, 2.0)
# Header style
for j in range(len(col_labels)):
table[0, j].set_facecolor('#37474F')
table[0, j].set_text_props(color='white', fontweight='bold')
# Row coloring
medium_col_idx = {'Chat': '#BBDEFB', 'Video': '#FFE0B2', 'VR': '#C8E6C9'}
cond_row = {'All Topics': 0.85, 'Excl. Ökologie': 0.60}
for i, (m, cond, *_) in enumerate(rows):
base_color = medium_col_idx[m]
alpha_mod = cond_row[cond]
for j in range(len(col_labels)):
cell = table[i + 1, j]
cell.set_facecolor(base_color)
cell.set_alpha(alpha_mod)
ax.set_title('F5 Statistical Summary: All Topics vs. Excl. Ökologie',
fontsize=13, fontweight='bold', pad=30)
fig.tight_layout()
fig.savefig(PLOT_DIR / 'F5_stats_table.png')
plt.close(fig)
# =============================================================================
# G. EFFECT PER TOPIC
# =============================================================================
def plot_G1_effect_per_topic(paired):
"""Bar chart: mean score gain + Cohen's d per topic, 95% CI."""
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
topics = TOPIC_ORDER
means, cis, ds, ps, ns = [], [], [], [], []
for t in topics:
sub = paired[paired['Topic'] == t]
g, sd, sem, d, tv, p, n = compute_effect(sub)
means.append(g); cis.append(sem * 1.96)
ds.append(d); ps.append(p); ns.append(n)
bars1 = ax1.bar(topics, means, color=[TOPIC_COLORS[t] for t in topics],
alpha=0.8, yerr=cis, capsize=6, edgecolor='white', lw=1.5)
for b, g, p, n in zip(bars1, means, ps, ns):
star = sig_stars(p)
ax1.text(b.get_x() + b.get_width()/2,
g + (b.get_height() * 0.05 if g >= 0 else -2),
f'{g:+.1f}%\nn={n}\n{star}',
ha='center', va='bottom', fontsize=10, fontweight='bold')
ax1.axhline(0, color='gray', lw=1)
ax1.set_ylabel('Mean Score Gain (%, 95% CI)', fontsize=12)
ax1.set_title('Mean Tutoring Gain per Topic', fontsize=12, fontweight='bold')
ax1.set_xticks(range(len(topics)))
ax1.set_xticklabels(topics, fontsize=11)
bars2 = ax2.bar(topics, ds, color=[TOPIC_COLORS[t] for t in topics],
alpha=0.8, edgecolor='white', lw=1.5)
for b, d_val, p in zip(bars2, ds, ps):
star = sig_stars(p)
ax2.text(b.get_x() + b.get_width()/2,
max(d_val, 0) + 0.03,
f"d={d_val:.2f}\n{star}",
ha='center', va='bottom', fontsize=10, fontweight='bold')
for thresh, label, ls in [(0.2, 'small', ':'), (0.5, 'medium', '--'), (0.8, 'large', '-')]:
ax2.axhline(thresh, color='gray', lw=1, ls=ls, alpha=0.5)
ax2.text(2.55, thresh + 0.02, label, fontsize=8, color='gray')
ax2.set_ylim(bottom=0)
ax2.set_ylabel("Cohen's d", fontsize=12)
ax2.set_title("Effect Size (Cohen's d) per Topic", fontsize=12, fontweight='bold')
ax2.set_xticks(range(len(topics)))
ax2.set_xticklabels(topics, fontsize=11)
fig.suptitle("G1 Tutoring Effect per Topic", fontsize=14, fontweight='bold')
fig.tight_layout()
fig.savefig(PLOT_DIR / 'G1_effect_per_topic.png')
plt.close(fig)
def plot_G2_slopes_per_topic(paired):
"""Paired slope plots per topic (3 panels), with medium-colored lines."""
fig, axes = plt.subplots(1, 3, figsize=(18, 7), sharey=True)
for col_idx, topic in enumerate(TOPIC_ORDER):
ax = axes[col_idx]
sub = paired[paired['Topic'] == topic].sort_values('P_Num')
for _, r in sub.iterrows():
ax.plot([0, 1], [r['Pre_Score'], r['Post_Score']],
color=MEDIUM_COLORS[r['Medium']], alpha=0.5, lw=1.5,
marker='o', markersize=5)
ax.annotate(r['Participant'], (1.02, r['Post_Score']),
fontsize=7, va='center', alpha=0.6)
for medium in MEDIUM_ORDER:
msub = sub[sub['Medium'] == medium]
if len(msub) > 0:
pm, qm = msub['Pre_Score'].mean(), msub['Post_Score'].mean()
ax.plot([0, 1], [pm, qm],
color=MEDIUM_COLORS[medium], lw=3.5, marker='D', markersize=10,
zorder=10, markeredgecolor='white', markeredgewidth=2,
label=f'{medium} ({qm-pm:+.1f}%)')
if len(sub) >= 2:
g, sd, sem, d, t, p, n = compute_effect(sub)
star = sig_stars(p)
ax.text(0.5, 0.03,
f'Overall: {g:+.1f}% d={d:.2f}\nt={t:.2f} p={p:.3f} {star}',
transform=ax.transAxes, ha='center', fontsize=9,
bbox=dict(boxstyle='round,pad=0.4', facecolor='lightyellow', alpha=0.9))
ax.set_xticks([0, 1])
ax.set_xticklabels(['Pre-Tutoring', 'Post-Tutoring'], fontsize=11)
ax.set_title(topic, fontsize=14, fontweight='bold', color=TOPIC_COLORS[topic])
ax.set_ylim(-5, 110)
ax.legend(fontsize=9, loc='upper left')
axes[0].set_ylabel('Test Score (%)', fontsize=12)
fig.suptitle('G2 Paired Slopes by Topic (Medium-Colored Lines)',
fontsize=14, fontweight='bold')
fig.tight_layout(rect=[0, 0, 1, 0.96])
fig.savefig(PLOT_DIR / 'G2_slopes_per_topic.png')
plt.close(fig)
# =============================================================================
# H. ALL MEDIUM × TOPIC COMBINATIONS (3×3 GRID)
# =============================================================================
def plot_H1_medium_topic_grid(paired):
"""3×3 grid: rows = mediums, cols = topics. Each cell = slope plot with stats."""
fig, axes = plt.subplots(3, 3, figsize=(18, 16), sharey=True)
for row_idx, medium in enumerate(MEDIUM_ORDER):
for col_idx, topic in enumerate(TOPIC_ORDER):
ax = axes[row_idx][col_idx]
sub = paired[(paired['Medium'] == medium) &
(paired['Topic'] == topic)].sort_values('P_Num')
for _, r in sub.iterrows():
ax.plot([0, 1], [r['Pre_Score'], r['Post_Score']],
color=TOPIC_COLORS[topic], alpha=0.55, lw=1.5,
marker='o', markersize=5)
ax.annotate(r['Participant'], (1.02, r['Post_Score']),
fontsize=7, va='center', alpha=0.6)
if len(sub) >= 2:
pre_m, post_m = sub['Pre_Score'].mean(), sub['Post_Score'].mean()
ax.plot([0, 1], [pre_m, post_m],
color=MEDIUM_COLORS[medium], lw=4, marker='D', markersize=11,
zorder=10, markeredgecolor='white', markeredgewidth=2)
g, sd, sem, d, t, p, n = compute_effect(sub)
star = sig_stars(p)
ax.text(0.5, 0.03,
f'n={n} {g:+.1f}%\nd={d:.2f} p={p:.3f} {star}',
transform=ax.transAxes, ha='center', fontsize=8.5,
bbox=dict(boxstyle='round,pad=0.3', facecolor='lightyellow', alpha=0.9))
elif len(sub) == 1:
r = sub.iloc[0]
pre_m, post_m = r['Pre_Score'], r['Post_Score']
ax.plot([0, 1], [pre_m, post_m],
color=MEDIUM_COLORS[medium], lw=3, marker='D', markersize=10,
zorder=10, markeredgecolor='white', markeredgewidth=2)
ax.text(0.5, 0.03, 'n=1 (no stats)', transform=ax.transAxes,
ha='center', fontsize=8.5, color='gray')
ax.set_xticks([0, 1])
ax.set_xticklabels(['Pre', 'Post'], fontsize=9)
ax.set_ylim(-5, 110)
if col_idx == 0:
ax.set_ylabel(f'{medium}\nScore (%)', fontsize=10, fontweight='bold',
color=MEDIUM_COLORS[medium])
if row_idx == 0:
ax.set_title(topic, fontsize=12, fontweight='bold',
color=TOPIC_COLORS[topic])
fig.suptitle('H1 Tutoring Slopes: All Medium × Topic Combinations',
fontsize=15, fontweight='bold')
fig.tight_layout(rect=[0, 0, 0.97, 0.97])
fig.savefig(PLOT_DIR / 'H1_medium_topic_grid.png')
plt.close(fig)
# =============================================================================
# I. OUTLIER INFLUENCE ANALYSIS
# =============================================================================
def _flag_outliers(paired):
"""Add 'Outlier' bool column based on IQR rule applied per medium."""
paired = paired.copy()
paired['Outlier'] = False
for m in MEDIUM_ORDER:
mask = paired['Medium'] == m
paired.loc[mask, 'Outlier'] = iqr_outlier_mask(paired.loc[mask, 'Score_Gain']).values
return paired
def plot_I1_outlier_scatter(paired):
"""Scatter of score gains per medium with outliers labeled."""
paired_f = _flag_outliers(paired)
fig, axes = plt.subplots(1, 3, figsize=(18, 6), sharey=True)
for col_idx, medium in enumerate(MEDIUM_ORDER):
ax = axes[col_idx]
sub = paired_f[paired_f['Medium'] == medium]
q1 = sub['Score_Gain'].quantile(0.25)
q3 = sub['Score_Gain'].quantile(0.75)
iqr = q3 - q1
lo = q1 - 1.5 * iqr
hi = q3 + 1.5 * iqr
ax.axhline(hi, color='#E53935', lw=1.5, ls='--', alpha=0.7, label=f'±1.5 IQR ({lo:.1f}{hi:.1f})')
ax.axhline(lo, color='#E53935', lw=1.5, ls='--', alpha=0.7)
ax.axhline(0, color='gray', lw=1, alpha=0.5)
rng = np.random.default_rng(42)
for _, r in sub.iterrows():
jit = rng.uniform(-0.12, 0.12)
color = '#E53935' if r['Outlier'] else MEDIUM_COLORS[medium]
ms = 10 if r['Outlier'] else 7
ax.scatter(0.5 + jit, r['Score_Gain'], color=color, s=ms**2,
alpha=0.8, edgecolors='white', lw=0.5, zorder=5)
if r['Outlier']:
lbl = f"{r['Participant']}\n({r['Topic'][:6]})"
ax.annotate(lbl, (0.5 + jit, r['Score_Gain']),
fontsize=7.5, ha='center',
xytext=(20 if jit > 0 else -20, 0),
textcoords='offset points',
arrowprops=dict(arrowstyle='->', color='#E53935', lw=0.8),
color='#E53935', fontweight='bold')
n_out = sub['Outlier'].sum()
ax.set_xlim(0, 1)
ax.set_xticks([0.5])
ax.set_xticklabels([medium], fontsize=12)
ax.set_title(f'{medium}\n({n_out} outlier{"s" if n_out != 1 else ""})',
fontsize=12, fontweight='bold', color=MEDIUM_COLORS[medium])
if col_idx == 0:
ax.set_ylabel('Score Gain (%)', fontsize=12)
ax.legend(fontsize=8, loc='upper right')
fig.suptitle('I1 Score Gain Scatter with Outlier Flags (IQR Method)',
fontsize=14, fontweight='bold')
fig.tight_layout()
fig.savefig(PLOT_DIR / 'I1_outlier_scatter.png')
plt.close(fig)
def plot_I2_outlier_effect_comparison(paired):
"""Cohen's d per medium: all data vs. outliers removed."""
paired_f = _flag_outliers(paired)
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
x = np.arange(len(MEDIUM_ORDER))
w = 0.35
all_ds, no_out_ds = [], []
all_gs, no_out_gs = [], []
all_ps, no_out_ps = [], []
all_ns, no_out_ns = [], []
for m in MEDIUM_ORDER:
sub_all = paired_f[paired_f['Medium'] == m]
sub_noo = sub_all[~sub_all['Outlier']]
g1, _, _, d1, t1, p1, n1 = compute_effect(sub_all)
g2, _, _, d2, t2, p2, n2 = compute_effect(sub_noo)
all_ds.append(d1 if not np.isnan(d1) else 0)
no_out_ds.append(d2 if not np.isnan(d2) else 0)
all_gs.append(g1); no_out_gs.append(g2)
all_ps.append(p1); no_out_ps.append(p2)
all_ns.append(n1); no_out_ns.append(n2)
for j, (label, ds, gs, ps, ns, alpha, hatch) in enumerate([
('All Data', all_ds, all_gs, all_ps, all_ns, 0.80, ''),
('Outliers Removed', no_out_ds, no_out_gs, no_out_ps, no_out_ns, 0.45, '//'),
]):
bars = ax1.bar(x + j*w - w/2, ds, w, label=label,
color=[MEDIUM_COLORS[m] for m in MEDIUM_ORDER],
alpha=alpha, hatch=hatch, edgecolor='white', lw=1.2)
for b, d_val, g, p, n in zip(bars, ds, gs, ps, ns):
star = sig_stars(p) if not np.isnan(p) else ''
ax1.text(b.get_x() + b.get_width()/2,
max(d_val, 0) + 0.03,
f'd={d_val:.2f}\n{g:+.1f}%\n{star}',
ha='center', va='bottom', fontsize=8.5, fontweight='bold')
for thresh, lbl, ls in [(0.2,'small',':'), (0.5,'medium','--'), (0.8,'large','-')]:
ax1.axhline(thresh, color='gray', lw=1, ls=ls, alpha=0.5)
ax1.text(2.65, thresh + 0.02, lbl, fontsize=8, color='gray')
ax1.set_xticks(x); ax1.set_xticklabels(MEDIUM_ORDER, fontsize=12)
ax1.set_ylim(bottom=0)
ax1.set_ylabel("Cohen's d", fontsize=12)
ax1.set_title("Cohen's d: All Data vs. Outliers Removed", fontsize=12, fontweight='bold')
ax1.legend(fontsize=10)
# Delta d
delta_d = [no - al for al, no in zip(all_ds, no_out_ds)]
colors_d = ['#43A047' if dd >= 0 else '#E53935' for dd in delta_d]
bars2 = ax2.bar(MEDIUM_ORDER, delta_d, color=colors_d, alpha=0.8, edgecolor='white', lw=1.5)
for b, dd in zip(bars2, delta_d):
ax2.text(b.get_x() + b.get_width()/2,
dd + (0.01 if dd >= 0 else -0.03),
f'Δd={dd:+.3f}',
ha='center', va='bottom' if dd >= 0 else 'top',
fontsize=10, fontweight='bold')
ax2.axhline(0, color='gray', lw=1)
ax2.set_ylabel('Δ Cohen\'s d (Outliers Removed All)', fontsize=12)
ax2.set_title('Change in Effect Size After Removing Outliers', fontsize=12, fontweight='bold')
fig.suptitle('I2 Outlier Influence on Effect Sizes', fontsize=14, fontweight='bold')
fig.tight_layout()
fig.savefig(PLOT_DIR / 'I2_outlier_effect_comparison.png')
plt.close(fig)
def plot_I3_outlier_heatmap(paired):
"""Heatmap: which participant×topic pairs are outliers per medium."""
paired_f = _flag_outliers(paired)
fig, axes = plt.subplots(1, 3, figsize=(18, 8))
for col_idx, medium in enumerate(MEDIUM_ORDER):
ax = axes[col_idx]
sub = paired_f[paired_f['Medium'] == medium].copy()
sub['Label'] = sub['Participant'] + '\n' + sub['Topic'].str[:8]
# Build pivot: rows = participants sorted, cols = topics
pivot = sub.pivot_table(index='Participant', columns='Topic',
values='Score_Gain', aggfunc='first')
pivot = pivot.reindex(columns=TOPIC_ORDER)
pivot = pivot.reindex(sorted(pivot.index, key=lambda x: int(x[1:])))
outlier_pivot = sub.pivot_table(index='Participant', columns='Topic',
values='Outlier', aggfunc='first')
outlier_pivot = outlier_pivot.reindex(columns=TOPIC_ORDER)
outlier_pivot = outlier_pivot.reindex(sorted(outlier_pivot.index,
key=lambda x: int(x[1:])))
# Draw heatmap of score gain
sns.heatmap(pivot.astype(float), annot=True, fmt='.1f',
cmap='RdYlGn', center=0, vmin=-40, vmax=60,
linewidths=0.8, ax=ax, cbar_kws={'label': 'Score Gain %'},
annot_kws={'size': 9})
# Overlay red border for outliers
for r_i, pid in enumerate(pivot.index):
for c_i, topic in enumerate(TOPIC_ORDER):
is_out = outlier_pivot.loc[pid, topic] if (pid in outlier_pivot.index and
topic in outlier_pivot.columns) else False
if is_out:
ax.add_patch(mpatches.Rectangle(
(c_i, r_i), 1, 1,
fill=False, edgecolor='#E53935', lw=3, zorder=5))
ax.set_title(f'{medium}', fontsize=13, fontweight='bold',
color=MEDIUM_COLORS[medium])
ax.set_xlabel('Topic', fontsize=10)
ax.set_ylabel('Participant' if col_idx == 0 else '', fontsize=10)
fig.suptitle('I3 Outlier Heatmap: Score Gain by Participant × Topic\n'
'(Red border = IQR outlier within that medium)',
fontsize=13, fontweight='bold')
fig.tight_layout()
fig.savefig(PLOT_DIR / 'I3_outlier_heatmap.png')
plt.close(fig)
# =============================================================================
# STATS EXPORT
# =============================================================================
def export_stats(paired):
paired_f = _flag_outliers(paired)
# --- F: effects by medium with/without Ökologie ---
rows_f = []
for m in MEDIUM_ORDER:
for cond_label, filter_fn in [
('All Topics', lambda sub: sub),
('Excl_Oekologie', lambda sub: sub[sub['Topic'].isin(TOPICS_NO_OEK)]),
]:
sub = filter_fn(paired[paired['Medium'] == m])
g, sd, sem, d, t, p, n = compute_effect(sub)
rows_f.append({
'Medium': m, 'Condition': cond_label, 'N': n,
'Mean_Gain': round(g, 3) if not np.isnan(g) else np.nan,
'SD_Gain': round(sd, 3) if not np.isnan(sd) else np.nan,
'Cohens_d': round(d, 3) if not np.isnan(d) else np.nan,
't_stat': round(t, 3) if not np.isnan(t) else np.nan,
'p_value': round(p, 4) if not np.isnan(p) else np.nan,
})
pd.DataFrame(rows_f).to_csv(
STATS_DIR / 'effects_by_medium_with_without_oekologie.csv', index=False)
# --- G: effects by topic ---
rows_g = []
for topic in TOPIC_ORDER:
sub = paired[paired['Topic'] == topic]
g, sd, sem, d, t, p, n = compute_effect(sub)
rows_g.append({
'Topic': topic, 'N': n,
'Mean_Gain': round(g, 3),
'SD_Gain': round(sd, 3),
'Cohens_d': round(d, 3) if not np.isnan(d) else np.nan,
't_stat': round(t, 3) if not np.isnan(t) else np.nan,
'p_value': round(p, 4) if not np.isnan(p) else np.nan,
})
pd.DataFrame(rows_g).to_csv(STATS_DIR / 'effects_by_topic.csv', index=False)
# --- H: effects by medium × topic ---
rows_h = []
for m in MEDIUM_ORDER:
for topic in TOPIC_ORDER:
sub = paired[(paired['Medium'] == m) & (paired['Topic'] == topic)]
g, sd, sem, d, t, p, n = compute_effect(sub)
rows_h.append({
'Medium': m, 'Topic': topic, 'N': n,
'Mean_Gain': round(g, 3) if not np.isnan(g) else np.nan,
'SD_Gain': round(sd, 3) if not np.isnan(sd) else np.nan,
'Cohens_d': round(d, 3) if not np.isnan(d) else np.nan,
't_stat': round(t, 3) if not np.isnan(t) else np.nan,
'p_value': round(p, 4) if not np.isnan(p) else np.nan,
})
pd.DataFrame(rows_h).to_csv(STATS_DIR / 'effects_by_medium_topic_grid.csv', index=False)
# --- I: outlier influence ---
rows_i = []
for m in MEDIUM_ORDER:
sub_all = paired_f[paired_f['Medium'] == m]
sub_noo = sub_all[~sub_all['Outlier']]
g1, sd1, _, d1, t1, p1, n1 = compute_effect(sub_all)
g2, sd2, _, d2, t2, p2, n2 = compute_effect(sub_noo)
outliers = sub_all[sub_all['Outlier']][['Participant', 'Topic', 'Score_Gain']]
out_list = '; '.join(f"{r['Participant']}/{r['Topic']}({r['Score_Gain']:+.1f}%)"
for _, r in outliers.iterrows())
rows_i.append({
'Medium': m,
'N_all': n1, 'Mean_Gain_all': round(g1, 3), 'Cohens_d_all': round(d1, 3) if not np.isnan(d1) else np.nan,
'p_all': round(p1, 4) if not np.isnan(p1) else np.nan,
'N_no_outliers': n2, 'Mean_Gain_no_outliers': round(g2, 3),
'Cohens_d_no_outliers': round(d2, 3) if not np.isnan(d2) else np.nan,
'p_no_outliers': round(p2, 4) if not np.isnan(p2) else np.nan,
'Delta_d': round((d2 - d1) if not (np.isnan(d1) or np.isnan(d2)) else np.nan, 3),
'Outliers': out_list,
})
pd.DataFrame(rows_i).to_csv(STATS_DIR / 'outlier_influence.csv', index=False)
print(f" Stats exported to: {STATS_DIR}")
# =============================================================================
# MAIN
# =============================================================================
def main():
print("Loading data...")
df = load_data()
paired = build_paired_tutoring(df)
print(f" {len(paired)} paired tutoring entries across "
f"{paired['Participant'].nunique()} participants\n")
sections = [
("F. Effect Without Ökologie (vs. With)", [
("F1", "Cohen's d comparison by medium", lambda: plot_F1_cohens_d_comparison(paired)),
("F2", "Mean score gain comparison", lambda: plot_F2_mean_gain_comparison(paired)),
("F3", "Paired slopes 2×3 grid", lambda: plot_F3_paired_slopes_comparison(paired)),
("F4", "Gain distribution comparison", lambda: plot_F4_gain_distribution_comparison(paired)),
("F5", "Descriptive stats table", lambda: plot_F5_stats_table(paired)),
]),
("G. Effect Per Topic", [
("G1", "Bar chart: gain + d per topic", lambda: plot_G1_effect_per_topic(paired)),
("G2", "Paired slopes per topic", lambda: plot_G2_slopes_per_topic(paired)),
]),
("H. Medium × Topic Grid", [
("H1", "3×3 grid of slope plots", lambda: plot_H1_medium_topic_grid(paired)),
]),
("I. Outlier Influence Analysis", [
("I1", "Outlier scatter per medium", lambda: plot_I1_outlier_scatter(paired)),
("I2", "Effect size: all vs. outliers removed", lambda: plot_I2_outlier_effect_comparison(paired)),
("I3", "Outlier heatmap (participant × topic)", lambda: plot_I3_outlier_heatmap(paired)),
]),
]
for section_name, plots in sections:
print(section_name)
for code, desc, fn in plots:
fn()
print(f" [{code}] {desc}")
print(f"\n11 plots saved to: {PLOT_DIR}")
print("\nExporting statistics...")
export_stats(paired)
print("Done.")
if __name__ == "__main__":
main()

View File

@ -29,7 +29,7 @@ from scipy import stats
# PATHS & CONSTANTS
# =============================================================================
BASE = Path(r"F:\GitHub Projekte\VirTu-Eval\Data")
BASE = Path(__file__).resolve().parent / "Data"
PLOT_DIR = BASE / "plots_questionnaires"
PLOT_DIR.mkdir(exist_ok=True)
STATS_DIR = BASE / "stats"