856 lines
37 KiB
Python
856 lines
37 KiB
Python
"""
|
||
generate_plots_effects.py
|
||
|
||
Effect-focused analysis for VirTu-Eval experiment data.
|
||
Generates plots into Data/plots_effects/ organized by section:
|
||
|
||
F. Effect Without Ökologie (vs. With) – 5 plots
|
||
G. Effect Per Topic – 2 plots
|
||
H. All Medium × Topic Combinations – 1 plot (3×3 grid)
|
||
I. Outlier Influence Analysis – 3 plots
|
||
|
||
Usage:
|
||
python generate_plots_effects.py
|
||
"""
|
||
|
||
import pandas as pd
|
||
import numpy as np
|
||
import matplotlib
|
||
matplotlib.use('Agg')
|
||
import matplotlib.pyplot as plt
|
||
import matplotlib.patches as mpatches
|
||
from matplotlib.lines import Line2D
|
||
import seaborn as sns
|
||
from pathlib import Path
|
||
from scipy import stats
|
||
|
||
# =============================================================================
|
||
# CONFIG
|
||
# =============================================================================
|
||
BASE = Path("Data")
|
||
PLOT_DIR = BASE / "plots_effects"
|
||
PLOT_DIR.mkdir(parents=True, exist_ok=True)
|
||
STATS_DIR = BASE / "stats"
|
||
STATS_DIR.mkdir(exist_ok=True)
|
||
|
||
PHASE_ORDER = ['Pre-Reading', 'Post-Reading', 'Pre-Tutoring', 'Post-Tutoring']
|
||
PHASE_LABELS = ['Pre-Read', 'Post-Read', 'Pre-Tutor', 'Post-Tutor']
|
||
PHASE_SHORT = dict(zip(PHASE_ORDER, PHASE_LABELS))
|
||
|
||
MEDIUM_ORDER = ['Chat', 'Video', 'VR']
|
||
MEDIUM_COLORS = {'Chat': '#2196F3', 'Video': '#FF9800', 'VR': '#4CAF50'}
|
||
|
||
TOPIC_ORDER = ['Mendel', 'DNA-Replikation', 'Ökologie']
|
||
TOPIC_COLORS = {'Mendel': '#E91E63', 'DNA-Replikation': '#9C27B0', 'Ökologie': '#009688'}
|
||
|
||
TOPICS_NO_OEK = ['Mendel', 'DNA-Replikation']
|
||
|
||
sns.set_theme(style="whitegrid", font_scale=1.05)
|
||
plt.rcParams['figure.dpi'] = 150
|
||
plt.rcParams['savefig.bbox'] = 'tight'
|
||
|
||
|
||
# =============================================================================
|
||
# HELPERS
|
||
# =============================================================================
|
||
|
||
def cohens_d(pre, post):
|
||
diff = post - pre
|
||
return diff.mean() / diff.std(ddof=1) if diff.std(ddof=1) > 0 else 0.0
|
||
|
||
|
||
def sig_stars(p):
|
||
if p < 0.001:
|
||
return '***'
|
||
elif p < 0.01:
|
||
return '**'
|
||
elif p < 0.05:
|
||
return '*'
|
||
return 'n.s.'
|
||
|
||
|
||
def compute_effect(sub):
|
||
"""Return (mean_gain, sd_gain, sem_gain, d, t, p, n) for a paired subset."""
|
||
pre = sub['Pre_Score']
|
||
post = sub['Post_Score']
|
||
n = len(sub)
|
||
if n < 2:
|
||
return sub['Score_Gain'].mean(), sub['Score_Gain'].std(), np.nan, np.nan, np.nan, np.nan, n
|
||
t, p = stats.ttest_rel(pre, post)
|
||
d = cohens_d(pre, post)
|
||
g = sub['Score_Gain']
|
||
return g.mean(), g.std(ddof=1), g.sem(), d, t, p, n
|
||
|
||
|
||
def iqr_outlier_mask(series):
|
||
"""Return boolean Series: True where value is an IQR outlier (1.5×IQR rule)."""
|
||
q1, q3 = series.quantile(0.25), series.quantile(0.75)
|
||
iqr = q3 - q1
|
||
return (series < q1 - 1.5 * iqr) | (series > q3 + 1.5 * iqr)
|
||
|
||
|
||
# =============================================================================
|
||
# DATA LOADING
|
||
# =============================================================================
|
||
|
||
def load_data():
|
||
df = pd.read_csv(BASE / "test_scores_all.csv", encoding="utf-8-sig")
|
||
df['Zeitpunkt'] = df['Zeitpunkt'].str.strip().replace('Pre-Tutor', 'Pre-Tutoring')
|
||
df['Phase'] = pd.Categorical(df['Zeitpunkt'], categories=PHASE_ORDER, ordered=True)
|
||
df['P_Num'] = df['Participant'].str.extract(r'(\d+)').astype(int)
|
||
return df
|
||
|
||
|
||
def build_paired_tutoring(df):
|
||
pre = df[df['Zeitpunkt'] == 'Pre-Tutoring'][
|
||
['Participant', 'Topic', 'Medium', 'Score_Pct', 'Avg_Confidence']].copy()
|
||
post = df[df['Zeitpunkt'] == 'Post-Tutoring'][
|
||
['Participant', 'Topic', 'Medium', 'Score_Pct', 'Avg_Confidence']].copy()
|
||
pre.columns = ['Participant', 'Topic', 'Medium', 'Pre_Score', 'Pre_Conf']
|
||
post.columns = ['Participant', 'Topic', 'Medium', 'Post_Score', 'Post_Conf']
|
||
paired = pre.merge(post, on=['Participant', 'Topic', 'Medium'])
|
||
paired['Score_Gain'] = paired['Post_Score'] - paired['Pre_Score']
|
||
paired['Conf_Gain'] = paired['Post_Conf'] - paired['Pre_Conf']
|
||
paired['P_Num'] = paired['Participant'].str.extract(r'(\d+)').astype(int)
|
||
return paired
|
||
|
||
|
||
# =============================================================================
|
||
# F. EFFECT WITHOUT ÖKOLOGIE (vs. WITH)
|
||
# =============================================================================
|
||
|
||
def plot_F1_cohens_d_comparison(paired):
|
||
"""Bar chart: Cohen's d per medium – All Topics vs. Excl. Ökologie."""
|
||
fig, ax = plt.subplots(figsize=(10, 6))
|
||
x = np.arange(len(MEDIUM_ORDER))
|
||
w = 0.35
|
||
|
||
for j, (label, use_all, hatch) in enumerate([
|
||
('All Topics', True, ''),
|
||
('Excl. Ökologie', False, '//'),
|
||
]):
|
||
ds, gs, ps, ns = [], [], [], []
|
||
for m in MEDIUM_ORDER:
|
||
sub = paired[paired['Medium'] == m]
|
||
sub_f = sub if use_all else sub[sub['Topic'].isin(TOPICS_NO_OEK)]
|
||
g, sd, sem, d, t, p, n = compute_effect(sub_f)
|
||
ds.append(d if not np.isnan(d) else 0)
|
||
gs.append(g)
|
||
ps.append(p if not np.isnan(p) else 1)
|
||
ns.append(n)
|
||
|
||
bars = ax.bar(x + j*w - w/2, ds, w, label=label,
|
||
color=[MEDIUM_COLORS[m] for m in MEDIUM_ORDER],
|
||
alpha=0.85 if j == 0 else 0.45,
|
||
hatch=hatch, edgecolor='white', linewidth=1.2)
|
||
|
||
for i, (b, g, p, d_val) in enumerate(zip(bars, gs, ps, ds)):
|
||
star = sig_stars(p)
|
||
ax.text(b.get_x() + b.get_width()/2,
|
||
max(d_val, 0) + 0.04,
|
||
f'd={d_val:.2f}\n{g:+.1f}%\n{star}',
|
||
ha='center', va='bottom', fontsize=8.5, fontweight='bold',
|
||
color='#333333')
|
||
|
||
ax.axhline(0.2, color='gray', lw=1, ls=':', alpha=0.6)
|
||
ax.axhline(0.5, color='gray', lw=1, ls='--', alpha=0.6)
|
||
ax.axhline(0.8, color='gray', lw=1, ls='-', alpha=0.4)
|
||
ax.text(2.65, 0.21, 'small', fontsize=8, color='gray', va='bottom')
|
||
ax.text(2.65, 0.51, 'medium', fontsize=8, color='gray', va='bottom')
|
||
ax.text(2.65, 0.81, 'large', fontsize=8, color='gray', va='bottom')
|
||
|
||
ax.set_xticks(x)
|
||
ax.set_xticklabels(MEDIUM_ORDER, fontsize=12)
|
||
ax.set_ylabel("Cohen's d (tutoring score gain)", fontsize=12)
|
||
ax.set_ylim(bottom=0)
|
||
ax.legend(fontsize=11)
|
||
ax.set_title("F1 – Effect Sizes by Medium: All Topics vs. Excl. Ökologie",
|
||
fontsize=13, fontweight='bold')
|
||
fig.tight_layout()
|
||
fig.savefig(PLOT_DIR / 'F1_cohens_d_comparison.png')
|
||
plt.close(fig)
|
||
|
||
|
||
def plot_F2_mean_gain_comparison(paired):
|
||
"""Bar chart with 95% CI: mean score gain per medium – All vs. Excl. Ökologie."""
|
||
fig, ax = plt.subplots(figsize=(10, 6))
|
||
x = np.arange(len(MEDIUM_ORDER))
|
||
w = 0.35
|
||
|
||
for j, (label, filter_fn, alpha, hatch) in enumerate([
|
||
('All Topics', lambda sub: sub, 0.80, ''),
|
||
('Excl. Ökologie', lambda sub: sub[sub['Topic'].isin(TOPICS_NO_OEK)], 0.45, '//'),
|
||
]):
|
||
means, cis = [], []
|
||
for m in MEDIUM_ORDER:
|
||
sub_f = filter_fn(paired[paired['Medium'] == m])
|
||
g, sd, sem, d, t, p, n = compute_effect(sub_f)
|
||
means.append(g)
|
||
cis.append(sem * 1.96)
|
||
|
||
bars = ax.bar(x + j*w - w/2, means, w, label=label,
|
||
color=[MEDIUM_COLORS[m] for m in MEDIUM_ORDER],
|
||
alpha=alpha, hatch=hatch, edgecolor='white', linewidth=1.2,
|
||
yerr=cis, capsize=5, error_kw=dict(lw=1.5, capthick=1.5))
|
||
|
||
for b, g in zip(bars, means):
|
||
ax.text(b.get_x() + b.get_width()/2,
|
||
g + (b.get_height() * 0.05 if g >= 0 else -2),
|
||
f'{g:+.1f}%',
|
||
ha='center', va='bottom', fontsize=9, fontweight='bold',
|
||
color='#333333')
|
||
|
||
ax.axhline(0, color='gray', lw=1)
|
||
ax.set_xticks(x)
|
||
ax.set_xticklabels(MEDIUM_ORDER, fontsize=12)
|
||
ax.set_ylabel('Mean Score Gain (%, 95% CI)', fontsize=12)
|
||
ax.legend(fontsize=11)
|
||
ax.set_title('F2 – Mean Score Gain by Medium: All Topics vs. Excl. Ökologie',
|
||
fontsize=13, fontweight='bold')
|
||
fig.tight_layout()
|
||
fig.savefig(PLOT_DIR / 'F2_mean_gain_comparison.png')
|
||
plt.close(fig)
|
||
|
||
|
||
def plot_F3_paired_slopes_comparison(paired):
|
||
"""2×3 grid: top row = All Topics, bottom row = Excl. Ökologie."""
|
||
conditions = [
|
||
('All Topics', paired, 0),
|
||
('Excl. Ökologie', paired[paired['Topic'].isin(TOPICS_NO_OEK)], 1),
|
||
]
|
||
|
||
fig, axes = plt.subplots(2, 3, figsize=(18, 12), sharey=True)
|
||
|
||
for row_idx, (cond_label, data, row) in enumerate(conditions):
|
||
for col_idx, medium in enumerate(MEDIUM_ORDER):
|
||
ax = axes[row][col_idx]
|
||
sub = data[data['Medium'] == medium].sort_values('P_Num')
|
||
|
||
for _, r in sub.iterrows():
|
||
ax.plot([0, 1], [r['Pre_Score'], r['Post_Score']],
|
||
color=TOPIC_COLORS[r['Topic']], alpha=0.55, lw=1.5,
|
||
marker='o', markersize=5)
|
||
ax.annotate(r['Participant'], (1.02, r['Post_Score']),
|
||
fontsize=7, va='center', alpha=0.6)
|
||
|
||
if len(sub) >= 2:
|
||
pre_m, post_m = sub['Pre_Score'].mean(), sub['Post_Score'].mean()
|
||
ax.plot([0, 1], [pre_m, post_m],
|
||
color=MEDIUM_COLORS[medium], lw=4, marker='D',
|
||
markersize=12, zorder=10,
|
||
markeredgecolor='white', markeredgewidth=2)
|
||
|
||
g, sd, sem, d, t, p, n = compute_effect(sub)
|
||
star = sig_stars(p)
|
||
ax.text(0.5, 0.03,
|
||
f'n={n} Gain: {g:+.1f}%\nd={d:.2f} t={t:.2f} p={p:.3f} {star}',
|
||
transform=ax.transAxes, ha='center', fontsize=9,
|
||
bbox=dict(boxstyle='round,pad=0.4', facecolor='lightyellow', alpha=0.9))
|
||
|
||
ax.set_xticks([0, 1])
|
||
ax.set_xticklabels(['Pre-Tutoring', 'Post-Tutoring'], fontsize=10)
|
||
ax.set_ylim(-5, 110)
|
||
|
||
title_color = MEDIUM_COLORS[medium]
|
||
if col_idx == 0:
|
||
ax.set_ylabel(f'{cond_label}\nTest Score (%)', fontsize=10, fontweight='bold')
|
||
if row == 0:
|
||
ax.set_title(medium, fontsize=13, fontweight='bold', color=title_color)
|
||
|
||
legend_els = [Line2D([0],[0], color=TOPIC_COLORS[t], lw=2, marker='o', ms=6, label=t)
|
||
for t in TOPIC_ORDER]
|
||
legend_els.append(Line2D([0],[0], color='gray', lw=4, marker='D', ms=8, label='Medium Mean'))
|
||
fig.legend(handles=legend_els, loc='lower center', ncol=4, fontsize=10,
|
||
bbox_to_anchor=(0.5, 0.01))
|
||
fig.suptitle('F3 – Paired Slopes: All Topics (top) vs. Excl. Ökologie (bottom)',
|
||
fontsize=14, fontweight='bold')
|
||
fig.tight_layout(rect=[0, 0.05, 1, 0.97])
|
||
fig.savefig(PLOT_DIR / 'F3_paired_slopes_comparison.png')
|
||
plt.close(fig)
|
||
|
||
|
||
def plot_F4_gain_distribution_comparison(paired):
|
||
"""Side-by-side violin+box plots per medium: All Topics vs. Excl. Ökologie."""
|
||
fig, axes = plt.subplots(1, 3, figsize=(18, 7), sharey=True)
|
||
|
||
for col_idx, medium in enumerate(MEDIUM_ORDER):
|
||
ax = axes[col_idx]
|
||
data_all = paired[paired['Medium'] == medium]['Score_Gain'].values
|
||
data_noe = paired[(paired['Medium'] == medium) &
|
||
(paired['Topic'].isin(TOPICS_NO_OEK))]['Score_Gain'].values
|
||
|
||
positions = [0.8, 2.2]
|
||
colors = [MEDIUM_COLORS[medium], MEDIUM_COLORS[medium]]
|
||
alphas = [0.75, 0.40]
|
||
labels_vp = ['All Topics', 'Excl. Ökologie']
|
||
|
||
for pos, data, alpha, lbl in zip(positions,
|
||
[data_all, data_noe],
|
||
alphas, labels_vp):
|
||
if len(data) < 2:
|
||
continue
|
||
parts = ax.violinplot(data, positions=[pos], widths=0.9,
|
||
showmedians=False, showextrema=False)
|
||
for pc in parts['bodies']:
|
||
pc.set_facecolor(MEDIUM_COLORS[medium])
|
||
pc.set_alpha(alpha)
|
||
|
||
bp = ax.boxplot(data, positions=[pos], widths=0.35,
|
||
patch_artist=True, showmeans=True, notch=False,
|
||
meanprops=dict(marker='D', markerfacecolor='black',
|
||
markeredgecolor='white', markersize=7),
|
||
medianprops=dict(color='white', lw=2),
|
||
boxprops=dict(facecolor=MEDIUM_COLORS[medium], alpha=alpha))
|
||
|
||
g, sd, sem, d, t, p, n = compute_effect(
|
||
paired[paired['Medium'] == medium] if lbl == 'All Topics'
|
||
else paired[(paired['Medium'] == medium) & paired['Topic'].isin(TOPICS_NO_OEK)]
|
||
)
|
||
star = sig_stars(p)
|
||
ax.text(pos, np.nanmax(data) + 4,
|
||
f'n={n}\nM={g:+.1f}%\nd={d:.2f} {star}',
|
||
ha='center', va='bottom', fontsize=8.5, fontweight='bold')
|
||
|
||
ax.axhline(0, color='gray', lw=1, ls='--', alpha=0.6)
|
||
ax.set_xticks(positions)
|
||
ax.set_xticklabels(['All\nTopics', 'Excl.\nÖkologie'], fontsize=10)
|
||
ax.set_title(medium, fontsize=13, fontweight='bold', color=MEDIUM_COLORS[medium])
|
||
if col_idx == 0:
|
||
ax.set_ylabel('Score Gain (%)', fontsize=12)
|
||
|
||
fig.suptitle('F4 – Gain Distributions: All Topics vs. Excl. Ökologie',
|
||
fontsize=14, fontweight='bold')
|
||
fig.tight_layout()
|
||
fig.savefig(PLOT_DIR / 'F4_gain_distribution_comparison.png')
|
||
plt.close(fig)
|
||
|
||
|
||
def plot_F5_stats_table(paired):
|
||
"""Rendered table: N, mean gain, SD, d, t, p for each medium × condition."""
|
||
fig, ax = plt.subplots(figsize=(14, 5))
|
||
ax.axis('off')
|
||
|
||
rows = []
|
||
for m in MEDIUM_ORDER:
|
||
for cond_label, filter_fn in [
|
||
('All Topics', lambda sub, _m=m: paired[paired['Medium'] == _m]),
|
||
('Excl. Ökologie', lambda sub, _m=m: paired[(paired['Medium'] == _m) &
|
||
paired['Topic'].isin(TOPICS_NO_OEK)]),
|
||
]:
|
||
sub = filter_fn(None)
|
||
g, sd, sem, d, t, p, n = compute_effect(sub)
|
||
star = sig_stars(p) if not np.isnan(p) else ''
|
||
rows.append([
|
||
m, cond_label, str(n),
|
||
f'{g:+.2f}', f'{sd:.2f}',
|
||
f'{d:.3f}' if not np.isnan(d) else '–',
|
||
f'{t:.3f}' if not np.isnan(t) else '–',
|
||
f'{p:.3f}{star}' if not np.isnan(p) else '–',
|
||
])
|
||
|
||
col_labels = ['Medium', 'Condition', 'N', 'Mean Gain (%)', 'SD',
|
||
"Cohen's d", 't-stat', 'p-value']
|
||
table = ax.table(cellText=rows, colLabels=col_labels,
|
||
loc='center', cellLoc='center')
|
||
table.auto_set_font_size(False)
|
||
table.set_fontsize(11)
|
||
table.scale(1.0, 2.0)
|
||
|
||
# Header style
|
||
for j in range(len(col_labels)):
|
||
table[0, j].set_facecolor('#37474F')
|
||
table[0, j].set_text_props(color='white', fontweight='bold')
|
||
|
||
# Row coloring
|
||
medium_col_idx = {'Chat': '#BBDEFB', 'Video': '#FFE0B2', 'VR': '#C8E6C9'}
|
||
cond_row = {'All Topics': 0.85, 'Excl. Ökologie': 0.60}
|
||
for i, (m, cond, *_) in enumerate(rows):
|
||
base_color = medium_col_idx[m]
|
||
alpha_mod = cond_row[cond]
|
||
for j in range(len(col_labels)):
|
||
cell = table[i + 1, j]
|
||
cell.set_facecolor(base_color)
|
||
cell.set_alpha(alpha_mod)
|
||
|
||
ax.set_title('F5 – Statistical Summary: All Topics vs. Excl. Ökologie',
|
||
fontsize=13, fontweight='bold', pad=30)
|
||
fig.tight_layout()
|
||
fig.savefig(PLOT_DIR / 'F5_stats_table.png')
|
||
plt.close(fig)
|
||
|
||
|
||
# =============================================================================
|
||
# G. EFFECT PER TOPIC
|
||
# =============================================================================
|
||
|
||
def plot_G1_effect_per_topic(paired):
|
||
"""Bar chart: mean score gain + Cohen's d per topic, 95% CI."""
|
||
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
|
||
|
||
topics = TOPIC_ORDER
|
||
means, cis, ds, ps, ns = [], [], [], [], []
|
||
for t in topics:
|
||
sub = paired[paired['Topic'] == t]
|
||
g, sd, sem, d, tv, p, n = compute_effect(sub)
|
||
means.append(g); cis.append(sem * 1.96)
|
||
ds.append(d); ps.append(p); ns.append(n)
|
||
|
||
bars1 = ax1.bar(topics, means, color=[TOPIC_COLORS[t] for t in topics],
|
||
alpha=0.8, yerr=cis, capsize=6, edgecolor='white', lw=1.5)
|
||
for b, g, p, n in zip(bars1, means, ps, ns):
|
||
star = sig_stars(p)
|
||
ax1.text(b.get_x() + b.get_width()/2,
|
||
g + (b.get_height() * 0.05 if g >= 0 else -2),
|
||
f'{g:+.1f}%\nn={n}\n{star}',
|
||
ha='center', va='bottom', fontsize=10, fontweight='bold')
|
||
ax1.axhline(0, color='gray', lw=1)
|
||
ax1.set_ylabel('Mean Score Gain (%, 95% CI)', fontsize=12)
|
||
ax1.set_title('Mean Tutoring Gain per Topic', fontsize=12, fontweight='bold')
|
||
ax1.set_xticks(range(len(topics)))
|
||
ax1.set_xticklabels(topics, fontsize=11)
|
||
|
||
bars2 = ax2.bar(topics, ds, color=[TOPIC_COLORS[t] for t in topics],
|
||
alpha=0.8, edgecolor='white', lw=1.5)
|
||
for b, d_val, p in zip(bars2, ds, ps):
|
||
star = sig_stars(p)
|
||
ax2.text(b.get_x() + b.get_width()/2,
|
||
max(d_val, 0) + 0.03,
|
||
f"d={d_val:.2f}\n{star}",
|
||
ha='center', va='bottom', fontsize=10, fontweight='bold')
|
||
for thresh, label, ls in [(0.2, 'small', ':'), (0.5, 'medium', '--'), (0.8, 'large', '-')]:
|
||
ax2.axhline(thresh, color='gray', lw=1, ls=ls, alpha=0.5)
|
||
ax2.text(2.55, thresh + 0.02, label, fontsize=8, color='gray')
|
||
ax2.set_ylim(bottom=0)
|
||
ax2.set_ylabel("Cohen's d", fontsize=12)
|
||
ax2.set_title("Effect Size (Cohen's d) per Topic", fontsize=12, fontweight='bold')
|
||
ax2.set_xticks(range(len(topics)))
|
||
ax2.set_xticklabels(topics, fontsize=11)
|
||
|
||
fig.suptitle("G1 – Tutoring Effect per Topic", fontsize=14, fontweight='bold')
|
||
fig.tight_layout()
|
||
fig.savefig(PLOT_DIR / 'G1_effect_per_topic.png')
|
||
plt.close(fig)
|
||
|
||
|
||
def plot_G2_slopes_per_topic(paired):
|
||
"""Paired slope plots per topic (3 panels), with medium-colored lines."""
|
||
fig, axes = plt.subplots(1, 3, figsize=(18, 7), sharey=True)
|
||
|
||
for col_idx, topic in enumerate(TOPIC_ORDER):
|
||
ax = axes[col_idx]
|
||
sub = paired[paired['Topic'] == topic].sort_values('P_Num')
|
||
|
||
for _, r in sub.iterrows():
|
||
ax.plot([0, 1], [r['Pre_Score'], r['Post_Score']],
|
||
color=MEDIUM_COLORS[r['Medium']], alpha=0.5, lw=1.5,
|
||
marker='o', markersize=5)
|
||
ax.annotate(r['Participant'], (1.02, r['Post_Score']),
|
||
fontsize=7, va='center', alpha=0.6)
|
||
|
||
for medium in MEDIUM_ORDER:
|
||
msub = sub[sub['Medium'] == medium]
|
||
if len(msub) > 0:
|
||
pm, qm = msub['Pre_Score'].mean(), msub['Post_Score'].mean()
|
||
ax.plot([0, 1], [pm, qm],
|
||
color=MEDIUM_COLORS[medium], lw=3.5, marker='D', markersize=10,
|
||
zorder=10, markeredgecolor='white', markeredgewidth=2,
|
||
label=f'{medium} ({qm-pm:+.1f}%)')
|
||
|
||
if len(sub) >= 2:
|
||
g, sd, sem, d, t, p, n = compute_effect(sub)
|
||
star = sig_stars(p)
|
||
ax.text(0.5, 0.03,
|
||
f'Overall: {g:+.1f}% d={d:.2f}\nt={t:.2f} p={p:.3f} {star}',
|
||
transform=ax.transAxes, ha='center', fontsize=9,
|
||
bbox=dict(boxstyle='round,pad=0.4', facecolor='lightyellow', alpha=0.9))
|
||
|
||
ax.set_xticks([0, 1])
|
||
ax.set_xticklabels(['Pre-Tutoring', 'Post-Tutoring'], fontsize=11)
|
||
ax.set_title(topic, fontsize=14, fontweight='bold', color=TOPIC_COLORS[topic])
|
||
ax.set_ylim(-5, 110)
|
||
ax.legend(fontsize=9, loc='upper left')
|
||
|
||
axes[0].set_ylabel('Test Score (%)', fontsize=12)
|
||
fig.suptitle('G2 – Paired Slopes by Topic (Medium-Colored Lines)',
|
||
fontsize=14, fontweight='bold')
|
||
fig.tight_layout(rect=[0, 0, 1, 0.96])
|
||
fig.savefig(PLOT_DIR / 'G2_slopes_per_topic.png')
|
||
plt.close(fig)
|
||
|
||
|
||
# =============================================================================
|
||
# H. ALL MEDIUM × TOPIC COMBINATIONS (3×3 GRID)
|
||
# =============================================================================
|
||
|
||
def plot_H1_medium_topic_grid(paired):
|
||
"""3×3 grid: rows = mediums, cols = topics. Each cell = slope plot with stats."""
|
||
fig, axes = plt.subplots(3, 3, figsize=(18, 16), sharey=True)
|
||
|
||
for row_idx, medium in enumerate(MEDIUM_ORDER):
|
||
for col_idx, topic in enumerate(TOPIC_ORDER):
|
||
ax = axes[row_idx][col_idx]
|
||
sub = paired[(paired['Medium'] == medium) &
|
||
(paired['Topic'] == topic)].sort_values('P_Num')
|
||
|
||
for _, r in sub.iterrows():
|
||
ax.plot([0, 1], [r['Pre_Score'], r['Post_Score']],
|
||
color=TOPIC_COLORS[topic], alpha=0.55, lw=1.5,
|
||
marker='o', markersize=5)
|
||
ax.annotate(r['Participant'], (1.02, r['Post_Score']),
|
||
fontsize=7, va='center', alpha=0.6)
|
||
|
||
if len(sub) >= 2:
|
||
pre_m, post_m = sub['Pre_Score'].mean(), sub['Post_Score'].mean()
|
||
ax.plot([0, 1], [pre_m, post_m],
|
||
color=MEDIUM_COLORS[medium], lw=4, marker='D', markersize=11,
|
||
zorder=10, markeredgecolor='white', markeredgewidth=2)
|
||
|
||
g, sd, sem, d, t, p, n = compute_effect(sub)
|
||
star = sig_stars(p)
|
||
ax.text(0.5, 0.03,
|
||
f'n={n} {g:+.1f}%\nd={d:.2f} p={p:.3f} {star}',
|
||
transform=ax.transAxes, ha='center', fontsize=8.5,
|
||
bbox=dict(boxstyle='round,pad=0.3', facecolor='lightyellow', alpha=0.9))
|
||
elif len(sub) == 1:
|
||
r = sub.iloc[0]
|
||
pre_m, post_m = r['Pre_Score'], r['Post_Score']
|
||
ax.plot([0, 1], [pre_m, post_m],
|
||
color=MEDIUM_COLORS[medium], lw=3, marker='D', markersize=10,
|
||
zorder=10, markeredgecolor='white', markeredgewidth=2)
|
||
ax.text(0.5, 0.03, 'n=1 (no stats)', transform=ax.transAxes,
|
||
ha='center', fontsize=8.5, color='gray')
|
||
|
||
ax.set_xticks([0, 1])
|
||
ax.set_xticklabels(['Pre', 'Post'], fontsize=9)
|
||
ax.set_ylim(-5, 110)
|
||
|
||
if col_idx == 0:
|
||
ax.set_ylabel(f'{medium}\nScore (%)', fontsize=10, fontweight='bold',
|
||
color=MEDIUM_COLORS[medium])
|
||
if row_idx == 0:
|
||
ax.set_title(topic, fontsize=12, fontweight='bold',
|
||
color=TOPIC_COLORS[topic])
|
||
|
||
fig.suptitle('H1 – Tutoring Slopes: All Medium × Topic Combinations',
|
||
fontsize=15, fontweight='bold')
|
||
fig.tight_layout(rect=[0, 0, 0.97, 0.97])
|
||
fig.savefig(PLOT_DIR / 'H1_medium_topic_grid.png')
|
||
plt.close(fig)
|
||
|
||
|
||
# =============================================================================
|
||
# I. OUTLIER INFLUENCE ANALYSIS
|
||
# =============================================================================
|
||
|
||
def _flag_outliers(paired):
|
||
"""Add 'Outlier' bool column based on IQR rule applied per medium."""
|
||
paired = paired.copy()
|
||
paired['Outlier'] = False
|
||
for m in MEDIUM_ORDER:
|
||
mask = paired['Medium'] == m
|
||
paired.loc[mask, 'Outlier'] = iqr_outlier_mask(paired.loc[mask, 'Score_Gain']).values
|
||
return paired
|
||
|
||
|
||
def plot_I1_outlier_scatter(paired):
|
||
"""Scatter of score gains per medium with outliers labeled."""
|
||
paired_f = _flag_outliers(paired)
|
||
fig, axes = plt.subplots(1, 3, figsize=(18, 6), sharey=True)
|
||
|
||
for col_idx, medium in enumerate(MEDIUM_ORDER):
|
||
ax = axes[col_idx]
|
||
sub = paired_f[paired_f['Medium'] == medium]
|
||
|
||
q1 = sub['Score_Gain'].quantile(0.25)
|
||
q3 = sub['Score_Gain'].quantile(0.75)
|
||
iqr = q3 - q1
|
||
lo = q1 - 1.5 * iqr
|
||
hi = q3 + 1.5 * iqr
|
||
|
||
ax.axhline(hi, color='#E53935', lw=1.5, ls='--', alpha=0.7, label=f'±1.5 IQR ({lo:.1f}–{hi:.1f})')
|
||
ax.axhline(lo, color='#E53935', lw=1.5, ls='--', alpha=0.7)
|
||
ax.axhline(0, color='gray', lw=1, alpha=0.5)
|
||
|
||
rng = np.random.default_rng(42)
|
||
for _, r in sub.iterrows():
|
||
jit = rng.uniform(-0.12, 0.12)
|
||
color = '#E53935' if r['Outlier'] else MEDIUM_COLORS[medium]
|
||
ms = 10 if r['Outlier'] else 7
|
||
ax.scatter(0.5 + jit, r['Score_Gain'], color=color, s=ms**2,
|
||
alpha=0.8, edgecolors='white', lw=0.5, zorder=5)
|
||
if r['Outlier']:
|
||
lbl = f"{r['Participant']}\n({r['Topic'][:6]})"
|
||
ax.annotate(lbl, (0.5 + jit, r['Score_Gain']),
|
||
fontsize=7.5, ha='center',
|
||
xytext=(20 if jit > 0 else -20, 0),
|
||
textcoords='offset points',
|
||
arrowprops=dict(arrowstyle='->', color='#E53935', lw=0.8),
|
||
color='#E53935', fontweight='bold')
|
||
|
||
n_out = sub['Outlier'].sum()
|
||
ax.set_xlim(0, 1)
|
||
ax.set_xticks([0.5])
|
||
ax.set_xticklabels([medium], fontsize=12)
|
||
ax.set_title(f'{medium}\n({n_out} outlier{"s" if n_out != 1 else ""})',
|
||
fontsize=12, fontweight='bold', color=MEDIUM_COLORS[medium])
|
||
if col_idx == 0:
|
||
ax.set_ylabel('Score Gain (%)', fontsize=12)
|
||
ax.legend(fontsize=8, loc='upper right')
|
||
|
||
fig.suptitle('I1 – Score Gain Scatter with Outlier Flags (IQR Method)',
|
||
fontsize=14, fontweight='bold')
|
||
fig.tight_layout()
|
||
fig.savefig(PLOT_DIR / 'I1_outlier_scatter.png')
|
||
plt.close(fig)
|
||
|
||
|
||
def plot_I2_outlier_effect_comparison(paired):
|
||
"""Cohen's d per medium: all data vs. outliers removed."""
|
||
paired_f = _flag_outliers(paired)
|
||
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
|
||
|
||
x = np.arange(len(MEDIUM_ORDER))
|
||
w = 0.35
|
||
all_ds, no_out_ds = [], []
|
||
all_gs, no_out_gs = [], []
|
||
all_ps, no_out_ps = [], []
|
||
all_ns, no_out_ns = [], []
|
||
|
||
for m in MEDIUM_ORDER:
|
||
sub_all = paired_f[paired_f['Medium'] == m]
|
||
sub_noo = sub_all[~sub_all['Outlier']]
|
||
g1, _, _, d1, t1, p1, n1 = compute_effect(sub_all)
|
||
g2, _, _, d2, t2, p2, n2 = compute_effect(sub_noo)
|
||
all_ds.append(d1 if not np.isnan(d1) else 0)
|
||
no_out_ds.append(d2 if not np.isnan(d2) else 0)
|
||
all_gs.append(g1); no_out_gs.append(g2)
|
||
all_ps.append(p1); no_out_ps.append(p2)
|
||
all_ns.append(n1); no_out_ns.append(n2)
|
||
|
||
for j, (label, ds, gs, ps, ns, alpha, hatch) in enumerate([
|
||
('All Data', all_ds, all_gs, all_ps, all_ns, 0.80, ''),
|
||
('Outliers Removed', no_out_ds, no_out_gs, no_out_ps, no_out_ns, 0.45, '//'),
|
||
]):
|
||
bars = ax1.bar(x + j*w - w/2, ds, w, label=label,
|
||
color=[MEDIUM_COLORS[m] for m in MEDIUM_ORDER],
|
||
alpha=alpha, hatch=hatch, edgecolor='white', lw=1.2)
|
||
for b, d_val, g, p, n in zip(bars, ds, gs, ps, ns):
|
||
star = sig_stars(p) if not np.isnan(p) else ''
|
||
ax1.text(b.get_x() + b.get_width()/2,
|
||
max(d_val, 0) + 0.03,
|
||
f'd={d_val:.2f}\n{g:+.1f}%\n{star}',
|
||
ha='center', va='bottom', fontsize=8.5, fontweight='bold')
|
||
|
||
for thresh, lbl, ls in [(0.2,'small',':'), (0.5,'medium','--'), (0.8,'large','-')]:
|
||
ax1.axhline(thresh, color='gray', lw=1, ls=ls, alpha=0.5)
|
||
ax1.text(2.65, thresh + 0.02, lbl, fontsize=8, color='gray')
|
||
ax1.set_xticks(x); ax1.set_xticklabels(MEDIUM_ORDER, fontsize=12)
|
||
ax1.set_ylim(bottom=0)
|
||
ax1.set_ylabel("Cohen's d", fontsize=12)
|
||
ax1.set_title("Cohen's d: All Data vs. Outliers Removed", fontsize=12, fontweight='bold')
|
||
ax1.legend(fontsize=10)
|
||
|
||
# Delta d
|
||
delta_d = [no - al for al, no in zip(all_ds, no_out_ds)]
|
||
colors_d = ['#43A047' if dd >= 0 else '#E53935' for dd in delta_d]
|
||
bars2 = ax2.bar(MEDIUM_ORDER, delta_d, color=colors_d, alpha=0.8, edgecolor='white', lw=1.5)
|
||
for b, dd in zip(bars2, delta_d):
|
||
ax2.text(b.get_x() + b.get_width()/2,
|
||
dd + (0.01 if dd >= 0 else -0.03),
|
||
f'Δd={dd:+.3f}',
|
||
ha='center', va='bottom' if dd >= 0 else 'top',
|
||
fontsize=10, fontweight='bold')
|
||
ax2.axhline(0, color='gray', lw=1)
|
||
ax2.set_ylabel('Δ Cohen\'s d (Outliers Removed − All)', fontsize=12)
|
||
ax2.set_title('Change in Effect Size After Removing Outliers', fontsize=12, fontweight='bold')
|
||
|
||
fig.suptitle('I2 – Outlier Influence on Effect Sizes', fontsize=14, fontweight='bold')
|
||
fig.tight_layout()
|
||
fig.savefig(PLOT_DIR / 'I2_outlier_effect_comparison.png')
|
||
plt.close(fig)
|
||
|
||
|
||
def plot_I3_outlier_heatmap(paired):
|
||
"""Heatmap: which participant×topic pairs are outliers per medium."""
|
||
paired_f = _flag_outliers(paired)
|
||
|
||
fig, axes = plt.subplots(1, 3, figsize=(18, 8))
|
||
|
||
for col_idx, medium in enumerate(MEDIUM_ORDER):
|
||
ax = axes[col_idx]
|
||
sub = paired_f[paired_f['Medium'] == medium].copy()
|
||
sub['Label'] = sub['Participant'] + '\n' + sub['Topic'].str[:8]
|
||
|
||
# Build pivot: rows = participants sorted, cols = topics
|
||
pivot = sub.pivot_table(index='Participant', columns='Topic',
|
||
values='Score_Gain', aggfunc='first')
|
||
pivot = pivot.reindex(columns=TOPIC_ORDER)
|
||
pivot = pivot.reindex(sorted(pivot.index, key=lambda x: int(x[1:])))
|
||
|
||
outlier_pivot = sub.pivot_table(index='Participant', columns='Topic',
|
||
values='Outlier', aggfunc='first')
|
||
outlier_pivot = outlier_pivot.reindex(columns=TOPIC_ORDER)
|
||
outlier_pivot = outlier_pivot.reindex(sorted(outlier_pivot.index,
|
||
key=lambda x: int(x[1:])))
|
||
|
||
# Draw heatmap of score gain
|
||
sns.heatmap(pivot.astype(float), annot=True, fmt='.1f',
|
||
cmap='RdYlGn', center=0, vmin=-40, vmax=60,
|
||
linewidths=0.8, ax=ax, cbar_kws={'label': 'Score Gain %'},
|
||
annot_kws={'size': 9})
|
||
|
||
# Overlay red border for outliers
|
||
for r_i, pid in enumerate(pivot.index):
|
||
for c_i, topic in enumerate(TOPIC_ORDER):
|
||
is_out = outlier_pivot.loc[pid, topic] if (pid in outlier_pivot.index and
|
||
topic in outlier_pivot.columns) else False
|
||
if is_out:
|
||
ax.add_patch(mpatches.Rectangle(
|
||
(c_i, r_i), 1, 1,
|
||
fill=False, edgecolor='#E53935', lw=3, zorder=5))
|
||
|
||
ax.set_title(f'{medium}', fontsize=13, fontweight='bold',
|
||
color=MEDIUM_COLORS[medium])
|
||
ax.set_xlabel('Topic', fontsize=10)
|
||
ax.set_ylabel('Participant' if col_idx == 0 else '', fontsize=10)
|
||
|
||
fig.suptitle('I3 – Outlier Heatmap: Score Gain by Participant × Topic\n'
|
||
'(Red border = IQR outlier within that medium)',
|
||
fontsize=13, fontweight='bold')
|
||
fig.tight_layout()
|
||
fig.savefig(PLOT_DIR / 'I3_outlier_heatmap.png')
|
||
plt.close(fig)
|
||
|
||
|
||
# =============================================================================
|
||
# STATS EXPORT
|
||
# =============================================================================
|
||
|
||
def export_stats(paired):
|
||
paired_f = _flag_outliers(paired)
|
||
|
||
# --- F: effects by medium with/without Ökologie ---
|
||
rows_f = []
|
||
for m in MEDIUM_ORDER:
|
||
for cond_label, filter_fn in [
|
||
('All Topics', lambda sub: sub),
|
||
('Excl_Oekologie', lambda sub: sub[sub['Topic'].isin(TOPICS_NO_OEK)]),
|
||
]:
|
||
sub = filter_fn(paired[paired['Medium'] == m])
|
||
g, sd, sem, d, t, p, n = compute_effect(sub)
|
||
rows_f.append({
|
||
'Medium': m, 'Condition': cond_label, 'N': n,
|
||
'Mean_Gain': round(g, 3) if not np.isnan(g) else np.nan,
|
||
'SD_Gain': round(sd, 3) if not np.isnan(sd) else np.nan,
|
||
'Cohens_d': round(d, 3) if not np.isnan(d) else np.nan,
|
||
't_stat': round(t, 3) if not np.isnan(t) else np.nan,
|
||
'p_value': round(p, 4) if not np.isnan(p) else np.nan,
|
||
})
|
||
pd.DataFrame(rows_f).to_csv(
|
||
STATS_DIR / 'effects_by_medium_with_without_oekologie.csv', index=False)
|
||
|
||
# --- G: effects by topic ---
|
||
rows_g = []
|
||
for topic in TOPIC_ORDER:
|
||
sub = paired[paired['Topic'] == topic]
|
||
g, sd, sem, d, t, p, n = compute_effect(sub)
|
||
rows_g.append({
|
||
'Topic': topic, 'N': n,
|
||
'Mean_Gain': round(g, 3),
|
||
'SD_Gain': round(sd, 3),
|
||
'Cohens_d': round(d, 3) if not np.isnan(d) else np.nan,
|
||
't_stat': round(t, 3) if not np.isnan(t) else np.nan,
|
||
'p_value': round(p, 4) if not np.isnan(p) else np.nan,
|
||
})
|
||
pd.DataFrame(rows_g).to_csv(STATS_DIR / 'effects_by_topic.csv', index=False)
|
||
|
||
# --- H: effects by medium × topic ---
|
||
rows_h = []
|
||
for m in MEDIUM_ORDER:
|
||
for topic in TOPIC_ORDER:
|
||
sub = paired[(paired['Medium'] == m) & (paired['Topic'] == topic)]
|
||
g, sd, sem, d, t, p, n = compute_effect(sub)
|
||
rows_h.append({
|
||
'Medium': m, 'Topic': topic, 'N': n,
|
||
'Mean_Gain': round(g, 3) if not np.isnan(g) else np.nan,
|
||
'SD_Gain': round(sd, 3) if not np.isnan(sd) else np.nan,
|
||
'Cohens_d': round(d, 3) if not np.isnan(d) else np.nan,
|
||
't_stat': round(t, 3) if not np.isnan(t) else np.nan,
|
||
'p_value': round(p, 4) if not np.isnan(p) else np.nan,
|
||
})
|
||
pd.DataFrame(rows_h).to_csv(STATS_DIR / 'effects_by_medium_topic_grid.csv', index=False)
|
||
|
||
# --- I: outlier influence ---
|
||
rows_i = []
|
||
for m in MEDIUM_ORDER:
|
||
sub_all = paired_f[paired_f['Medium'] == m]
|
||
sub_noo = sub_all[~sub_all['Outlier']]
|
||
g1, sd1, _, d1, t1, p1, n1 = compute_effect(sub_all)
|
||
g2, sd2, _, d2, t2, p2, n2 = compute_effect(sub_noo)
|
||
outliers = sub_all[sub_all['Outlier']][['Participant', 'Topic', 'Score_Gain']]
|
||
out_list = '; '.join(f"{r['Participant']}/{r['Topic']}({r['Score_Gain']:+.1f}%)"
|
||
for _, r in outliers.iterrows())
|
||
rows_i.append({
|
||
'Medium': m,
|
||
'N_all': n1, 'Mean_Gain_all': round(g1, 3), 'Cohens_d_all': round(d1, 3) if not np.isnan(d1) else np.nan,
|
||
'p_all': round(p1, 4) if not np.isnan(p1) else np.nan,
|
||
'N_no_outliers': n2, 'Mean_Gain_no_outliers': round(g2, 3),
|
||
'Cohens_d_no_outliers': round(d2, 3) if not np.isnan(d2) else np.nan,
|
||
'p_no_outliers': round(p2, 4) if not np.isnan(p2) else np.nan,
|
||
'Delta_d': round((d2 - d1) if not (np.isnan(d1) or np.isnan(d2)) else np.nan, 3),
|
||
'Outliers': out_list,
|
||
})
|
||
pd.DataFrame(rows_i).to_csv(STATS_DIR / 'outlier_influence.csv', index=False)
|
||
|
||
print(f" Stats exported to: {STATS_DIR}")
|
||
|
||
|
||
# =============================================================================
|
||
# MAIN
|
||
# =============================================================================
|
||
|
||
def main():
|
||
print("Loading data...")
|
||
df = load_data()
|
||
paired = build_paired_tutoring(df)
|
||
print(f" {len(paired)} paired tutoring entries across "
|
||
f"{paired['Participant'].nunique()} participants\n")
|
||
|
||
sections = [
|
||
("F. Effect Without Ökologie (vs. With)", [
|
||
("F1", "Cohen's d comparison by medium", lambda: plot_F1_cohens_d_comparison(paired)),
|
||
("F2", "Mean score gain comparison", lambda: plot_F2_mean_gain_comparison(paired)),
|
||
("F3", "Paired slopes 2×3 grid", lambda: plot_F3_paired_slopes_comparison(paired)),
|
||
("F4", "Gain distribution comparison", lambda: plot_F4_gain_distribution_comparison(paired)),
|
||
("F5", "Descriptive stats table", lambda: plot_F5_stats_table(paired)),
|
||
]),
|
||
("G. Effect Per Topic", [
|
||
("G1", "Bar chart: gain + d per topic", lambda: plot_G1_effect_per_topic(paired)),
|
||
("G2", "Paired slopes per topic", lambda: plot_G2_slopes_per_topic(paired)),
|
||
]),
|
||
("H. Medium × Topic Grid", [
|
||
("H1", "3×3 grid of slope plots", lambda: plot_H1_medium_topic_grid(paired)),
|
||
]),
|
||
("I. Outlier Influence Analysis", [
|
||
("I1", "Outlier scatter per medium", lambda: plot_I1_outlier_scatter(paired)),
|
||
("I2", "Effect size: all vs. outliers removed", lambda: plot_I2_outlier_effect_comparison(paired)),
|
||
("I3", "Outlier heatmap (participant × topic)", lambda: plot_I3_outlier_heatmap(paired)),
|
||
]),
|
||
]
|
||
|
||
for section_name, plots in sections:
|
||
print(section_name)
|
||
for code, desc, fn in plots:
|
||
fn()
|
||
print(f" [{code}] {desc}")
|
||
|
||
print(f"\n11 plots saved to: {PLOT_DIR}")
|
||
|
||
print("\nExporting statistics...")
|
||
export_stats(paired)
|
||
print("Done.")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|