Files
Virtual-Tutor-Eval/generate_plots.py

778 lines
45 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
generate_plots.py
Consolidated visualization script for VirTu-Eval experiment data.
Generates all plots into Data/plots/ organized by section:
A. Overall Learning Trajectory (4 plots)
B. Tutoring Phase Deep-Dive (5 plots)
C. Start-to-Finish Gains (2 plots)
D. Confidence Analysis (3 plots)
E. Personality Correlations (2 plots)
Usage:
python generate_plots.py
"""
import csv
import pandas as pd
import numpy as np
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from matplotlib.lines import Line2D
import seaborn as sns
from pathlib import Path
from io import StringIO
from scipy import stats
# =============================================================================
# CONFIG
# =============================================================================
BASE = Path(__file__).resolve().parent / "Data"
PLOT_DIR = BASE / "plots"
PLOT_DIR.mkdir(exist_ok=True)
STATS_DIR = BASE / "stats"
STATS_DIR.mkdir(exist_ok=True)
PHASE_ORDER = ['Pre-Reading', 'Post-Reading', 'Pre-Tutoring', 'Post-Tutoring']
PHASE_LABELS = ['Pre-Read', 'Post-Read', 'Pre-Tutor', 'Post-Tutor']
PHASE_SHORT = dict(zip(PHASE_ORDER, PHASE_LABELS))
MEDIUM_ORDER = ['Chat', 'Video', 'VR']
MEDIUM_COLORS = {'Chat': '#2196F3', 'Video': '#FF9800', 'VR': '#4CAF50'}
TOPIC_ORDER = ['Mendel', 'DNA-Replikation', 'Ökologie']
TOPIC_COLORS = {'Mendel': '#E91E63', 'DNA-Replikation': '#9C27B0', 'Ökologie': '#009688'}
BFI_TRAITS = {
'Neuroticism': {'items': [1, 2, 3], 'reverse': [3]},
'Extraversion': {'items': [4, 5, 6], 'reverse': [6]},
'Openness': {'items': [7, 8, 9], 'reverse': []},
'Agreeableness': {'items': [10, 11, 12], 'reverse': [10]},
'Conscientiousness': {'items': [13, 14, 15], 'reverse': [14]},
}
TRAIT_ORDER = list(BFI_TRAITS.keys())
TRAIT_COLORS = {'Neuroticism': '#E53935', 'Extraversion': '#FB8C00',
'Openness': '#43A047', 'Agreeableness': '#1E88E5',
'Conscientiousness': '#8E24AA'}
sns.set_theme(style="whitegrid", font_scale=1.05)
plt.rcParams['figure.dpi'] = 150
plt.rcParams['savefig.bbox'] = 'tight'
def cohens_d(pre, post):
diff = post - pre
return diff.mean() / diff.std() if diff.std() > 0 else 0
# =============================================================================
# DATA LOADING
# =============================================================================
def load_data():
df = pd.read_csv(BASE / "test_scores_all.csv", encoding="utf-8-sig")
# Normalize typo "Pre-Tutor" -> "Pre-Tutoring"
df['Zeitpunkt'] = df['Zeitpunkt'].str.strip().replace('Pre-Tutor', 'Pre-Tutoring')
df['Phase'] = pd.Categorical(df['Zeitpunkt'], categories=PHASE_ORDER, ordered=True)
df['Phase_Label'] = pd.Categorical(
df['Zeitpunkt'].map(PHASE_SHORT), categories=PHASE_LABELS, ordered=True)
df['Phase_Idx'] = df['Zeitpunkt'].map({p: i for i, p in enumerate(PHASE_ORDER)})
df['P_Num'] = df['Participant'].str.extract(r'(\d+)').astype(int)
return df
def build_paired_tutoring(df):
pre = df[df['Zeitpunkt'] == 'Pre-Tutoring'][
['Participant', 'Topic', 'Medium', 'Score_Pct', 'Avg_Confidence']].copy()
post = df[df['Zeitpunkt'] == 'Post-Tutoring'][
['Participant', 'Topic', 'Medium', 'Score_Pct', 'Avg_Confidence']].copy()
pre.columns = ['Participant', 'Topic', 'Medium', 'Pre_Score', 'Pre_Conf']
post.columns = ['Participant', 'Topic', 'Medium', 'Post_Score', 'Post_Conf']
paired = pre.merge(post, on=['Participant', 'Topic', 'Medium'])
paired['Score_Gain'] = paired['Post_Score'] - paired['Pre_Score']
paired['Conf_Gain'] = paired['Post_Conf'] - paired['Pre_Conf']
paired['P_Num'] = paired['Participant'].str.extract(r'(\d+)').astype(int)
return paired
def load_personality():
path = BASE / "Final-Questionnaire.csv"
with open(path, encoding="utf-8-sig") as f:
reader = csv.reader(StringIO(f.read()))
rows = list(reader)
header, data = rows[0], rows[1:]
records = []
for row in data:
pid = row[-1].strip()
if not pid:
continue
pid = pid if pid.startswith('P') else f'P{pid}'
items = {}
for i in range(1, 16):
try:
items[i] = int(row[i].strip())
except (ValueError, IndexError):
items[i] = np.nan
traits = {}
for trait, info in BFI_TRAITS.items():
vals = []
for it in info['items']:
v = items.get(it, np.nan)
if pd.notna(v):
vals.append(8 - v if it in info['reverse'] else v)
traits[trait] = np.mean(vals) if vals else np.nan
rec = {'Participant': pid}
rec.update(traits)
records.append(rec)
return pd.DataFrame(records)
# =============================================================================
# A. OVERALL LEARNING TRAJECTORY
# =============================================================================
def plot_A1_trajectory(df):
fig, ax1 = plt.subplots(figsize=(10, 6))
means = df.groupby('Phase_Label', observed=True).agg(
S=('Score_Pct', 'mean'), S_se=('Score_Pct', 'sem'),
C=('Avg_Confidence', 'mean'), C_se=('Avg_Confidence', 'sem'),
).reindex(PHASE_LABELS)
x = np.arange(4)
c1, c2 = '#1976D2', '#E65100'
ax1.errorbar(x, means['S'], yerr=means['S_se']*1.96, color=c1, marker='o',
markersize=10, linewidth=2.5, capsize=5, capthick=2, label='Score %', zorder=5)
ax1.set_ylabel('Test Score (%)', color=c1, fontsize=13); ax1.set_ylim(30, 100)
ax1.tick_params(axis='y', labelcolor=c1)
ax2 = ax1.twinx()
ax2.errorbar(x, means['C'], yerr=means['C_se']*1.96, color=c2, marker='s',
markersize=10, linewidth=2.5, capsize=5, capthick=2, linestyle='--',
label='Confidence', zorder=5)
ax2.set_ylabel('Avg Confidence (1-7)', color=c2, fontsize=13); ax2.set_ylim(1, 7)
ax2.tick_params(axis='y', labelcolor=c2)
ax1.set_xticks(x); ax1.set_xticklabels(PHASE_LABELS, fontsize=12)
for i, row in means.iterrows():
idx = PHASE_LABELS.index(i)
ax1.annotate(f'{row["S"]:.1f}%', (idx, row['S']), textcoords="offset points",
xytext=(0, 14), ha='center', fontsize=10, color=c1, fontweight='bold')
ax2.annotate(f'{row["C"]:.2f}', (idx, row['C']), textcoords="offset points",
xytext=(0, -18), ha='center', fontsize=10, color=c2, fontweight='bold')
h1, l1 = ax1.get_legend_handles_labels()
h2, l2 = ax2.get_legend_handles_labels()
ax1.legend(h1+h2, l1+l2, loc='lower right', fontsize=11)
ax1.annotate('', xy=(0.32,-0.12), xytext=(0,-0.12), arrowprops=dict(arrowstyle='<->',color='gray',lw=1.5), annotation_clip=False, xycoords='axes fraction')
ax1.annotate('', xy=(1,-0.12), xytext=(0.68,-0.12), arrowprops=dict(arrowstyle='<->',color='gray',lw=1.5), annotation_clip=False, xycoords='axes fraction')
ax1.text(0.16,-0.17,'Reading Phase',transform=ax1.transAxes,ha='center',fontsize=10,color='gray')
ax1.text(0.84,-0.17,'Tutoring Phase',transform=ax1.transAxes,ha='center',fontsize=10,color='gray')
fig.suptitle('Overall Learning Trajectory', fontsize=15, fontweight='bold')
fig.savefig(PLOT_DIR / 'A1_trajectory.png', bbox_inches='tight'); plt.close(fig)
def plot_A2_trajectory_by_medium(df):
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
x = np.arange(4); off = [-0.1, 0, 0.1]
for j, m in enumerate(MEDIUM_ORDER):
sub = df[df['Medium'] == m]
ms = sub.groupby('Phase_Label', observed=True)['Score_Pct'].agg(['mean','sem']).reindex(PHASE_LABELS)
ax1.errorbar(x+off[j], ms['mean'], yerr=ms['sem']*1.96, color=MEDIUM_COLORS[m],
marker='o', markersize=8, linewidth=2, capsize=4, label=m)
mc = sub.groupby('Phase_Label', observed=True)['Avg_Confidence'].agg(['mean','sem']).reindex(PHASE_LABELS)
ax2.errorbar(x+off[j], mc['mean'], yerr=mc['sem']*1.96, color=MEDIUM_COLORS[m],
marker='s', markersize=8, linewidth=2, capsize=4, linestyle='--', label=m)
ax1.set_xticks(x); ax1.set_xticklabels(PHASE_LABELS); ax1.set_ylabel('Test Score (%)'); ax1.set_ylim(30,100); ax1.legend(title='Medium'); ax1.set_title('Score')
ax2.set_xticks(x); ax2.set_xticklabels(PHASE_LABELS); ax2.set_ylabel('Avg Confidence (1-7)'); ax2.set_ylim(1,7); ax2.legend(title='Medium'); ax2.set_title('Confidence')
fig.suptitle('Learning Trajectories by Medium', fontsize=14, fontweight='bold')
fig.tight_layout(); fig.savefig(PLOT_DIR / 'A2_trajectory_by_medium.png'); plt.close(fig)
def plot_A3_trajectory_by_topic(df):
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
x = np.arange(4); off = [-0.1, 0, 0.1]
for j, t in enumerate(TOPIC_ORDER):
sub = df[df['Topic'] == t]
ms = sub.groupby('Phase_Label', observed=True)['Score_Pct'].agg(['mean','sem']).reindex(PHASE_LABELS)
ax1.errorbar(x+off[j], ms['mean'], yerr=ms['sem']*1.96, color=TOPIC_COLORS[t],
marker='o', markersize=8, linewidth=2, capsize=4, label=t)
mc = sub.groupby('Phase_Label', observed=True)['Avg_Confidence'].agg(['mean','sem']).reindex(PHASE_LABELS)
ax2.errorbar(x+off[j], mc['mean'], yerr=mc['sem']*1.96, color=TOPIC_COLORS[t],
marker='s', markersize=8, linewidth=2, capsize=4, linestyle='--', label=t)
ax1.set_xticks(x); ax1.set_xticklabels(PHASE_LABELS); ax1.set_ylabel('Test Score (%)'); ax1.set_ylim(30,100); ax1.legend(title='Topic'); ax1.set_title('Score')
ax2.set_xticks(x); ax2.set_xticklabels(PHASE_LABELS); ax2.set_ylabel('Avg Confidence (1-7)'); ax2.set_ylim(1,7); ax2.legend(title='Topic'); ax2.set_title('Confidence')
fig.suptitle('Learning Trajectories by Topic', fontsize=14, fontweight='bold')
fig.tight_layout(); fig.savefig(PLOT_DIR / 'A3_trajectory_by_topic.png'); plt.close(fig)
def plot_A4_heatmap(df):
pivot_s = df.pivot_table(index='Participant', columns='Zeitpunkt', values='Score_Pct', aggfunc='mean')
pivot_s = pivot_s.reindex(columns=PHASE_ORDER).reindex(sorted(pivot_s.index, key=lambda x: int(x[1:])))
pivot_s.columns = PHASE_LABELS
pivot_c = df.pivot_table(index='Participant', columns='Zeitpunkt', values='Avg_Confidence', aggfunc='mean')
pivot_c = pivot_c.reindex(columns=PHASE_ORDER).reindex(sorted(pivot_c.index, key=lambda x: int(x[1:])))
pivot_c.columns = PHASE_LABELS
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 8))
sns.heatmap(pivot_s, annot=True, fmt='.0f', cmap='RdYlGn', vmin=20, vmax=100, ax=ax1, linewidths=.5, cbar_kws={'label':'Score %'})
ax1.set_title('Test Scores'); ax1.set_ylabel('Participant')
sns.heatmap(pivot_c, annot=True, fmt='.1f', cmap='YlOrRd', vmin=1, vmax=7, ax=ax2, linewidths=.5, cbar_kws={'label':'Confidence (1-7)'})
ax2.set_title('Confidence'); ax2.set_ylabel('')
fig.suptitle('Participant-Level Heatmaps', fontsize=14, fontweight='bold')
fig.tight_layout(); fig.savefig(PLOT_DIR / 'A4_heatmap.png'); plt.close(fig)
# =============================================================================
# B. TUTORING PHASE DEEP-DIVE
# =============================================================================
def plot_B1_tutoring_slopes_by_medium(paired):
fig, axes = plt.subplots(1, 3, figsize=(18, 7), sharey=True)
for ax, medium in zip(axes, MEDIUM_ORDER):
sub = paired[paired['Medium'] == medium].sort_values('P_Num')
for _, row in sub.iterrows():
ax.plot([0,1], [row['Pre_Score'], row['Post_Score']], color=TOPIC_COLORS[row['Topic']],
alpha=0.5, linewidth=1.5, marker='o', markersize=5)
ax.annotate(row['Participant'], (1.02, row['Post_Score']), fontsize=7, va='center', alpha=0.6)
pre_m, post_m = sub['Pre_Score'].mean(), sub['Post_Score'].mean()
ax.plot([0,1], [pre_m, post_m], color=MEDIUM_COLORS[medium], linewidth=4, marker='D',
markersize=12, zorder=10, markeredgecolor='white', markeredgewidth=2)
t, p = stats.ttest_rel(sub['Pre_Score'], sub['Post_Score'])
d = cohens_d(sub['Pre_Score'], sub['Post_Score'])
sig = '***' if p<.001 else '**' if p<.01 else '*' if p<.05 else 'n.s.'
ax.text(0.5, 0.02, f'Gain: {post_m-pre_m:+.1f}% d={d:.2f}\nt={t:.2f}, p={p:.3f} {sig}',
transform=ax.transAxes, ha='center', fontsize=10,
bbox=dict(boxstyle='round,pad=0.4', facecolor='lightyellow', alpha=0.9))
ax.set_xticks([0,1]); ax.set_xticklabels(['Pre-Tutoring','Post-Tutoring'], fontsize=11)
ax.set_title(medium, fontsize=14, fontweight='bold', color=MEDIUM_COLORS[medium])
ax.set_ylim(-5, 110)
axes[0].set_ylabel('Test Score (%)', fontsize=12)
legend_el = [Line2D([0],[0], color=TOPIC_COLORS[t], lw=2, marker='o', ms=6, label=t) for t in TOPIC_ORDER]
legend_el.append(Line2D([0],[0], color='gray', lw=4, marker='D', ms=8, label='Medium Mean'))
fig.legend(handles=legend_el, loc='upper center', ncol=4, fontsize=10, bbox_to_anchor=(0.5, 0.02))
fig.suptitle('Tutoring: Individual Trajectories by Medium', fontsize=15, fontweight='bold')
fig.tight_layout(rect=[0,0.05,1,0.96]); fig.savefig(PLOT_DIR / 'B1_tutoring_slopes_by_medium.png'); plt.close(fig)
def plot_B2_tutoring_slopes_by_topic(paired):
fig, axes = plt.subplots(1, 3, figsize=(18, 7), sharey=True)
for ax, topic in zip(axes, TOPIC_ORDER):
sub = paired[paired['Topic'] == topic].sort_values('P_Num')
for _, row in sub.iterrows():
ax.plot([0,1], [row['Pre_Score'], row['Post_Score']], color=MEDIUM_COLORS[row['Medium']],
alpha=0.5, linewidth=1.5, marker='o', markersize=5)
ax.annotate(row['Participant'], (1.02, row['Post_Score']), fontsize=7, va='center', alpha=0.6)
for medium in MEDIUM_ORDER:
msub = sub[sub['Medium'] == medium]
if len(msub) > 0:
pm, qm = msub['Pre_Score'].mean(), msub['Post_Score'].mean()
ax.plot([0,1], [pm, qm], color=MEDIUM_COLORS[medium], linewidth=3.5,
marker='D', markersize=10, zorder=10, markeredgecolor='white', markeredgewidth=2,
label=f'{medium} ({qm-pm:+.1f}%)')
t, p = stats.ttest_rel(sub['Pre_Score'], sub['Post_Score'])
d = cohens_d(sub['Pre_Score'], sub['Post_Score'])
sig = '***' if p<.001 else '**' if p<.01 else '*' if p<.05 else 'n.s.'
ax.text(0.5, 0.02, f'Overall: {sub["Score_Gain"].mean():+.1f}% d={d:.2f}\np={p:.3f} {sig}',
transform=ax.transAxes, ha='center', fontsize=10,
bbox=dict(boxstyle='round,pad=0.4', facecolor='lightyellow', alpha=0.9))
ax.set_xticks([0,1]); ax.set_xticklabels(['Pre-Tutoring','Post-Tutoring'], fontsize=11)
ax.set_title(topic, fontsize=14, fontweight='bold', color=TOPIC_COLORS[topic])
ax.set_ylim(-5, 110); ax.legend(fontsize=9, loc='upper left')
axes[0].set_ylabel('Test Score (%)', fontsize=12)
fig.suptitle('Tutoring: Individual Trajectories by Topic', fontsize=15, fontweight='bold')
fig.tight_layout(rect=[0,0,1,0.96]); fig.savefig(PLOT_DIR / 'B2_tutoring_slopes_by_topic.png'); plt.close(fig)
def plot_B3_tutoring_gain_by_medium(paired):
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6.5))
rng = np.random.default_rng(42)
for i, m in enumerate(MEDIUM_ORDER):
sub = paired[paired['Medium'] == m]
g, se = sub['Score_Gain'].mean(), sub['Score_Gain'].sem()
d = cohens_d(sub['Pre_Score'], sub['Post_Score'])
ax1.bar(i, g, color=MEDIUM_COLORS[m], alpha=0.6, width=0.6, yerr=se*1.96, capsize=6, edgecolor='white', lw=1.5)
jit = rng.uniform(-0.15, 0.15, len(sub))
ax1.scatter(np.full(len(sub),i)+jit, sub['Score_Gain'], color=MEDIUM_COLORS[m], s=40, alpha=0.7, edgecolors='white', lw=0.5, zorder=5)
ax1.text(i, g+se*1.96+2, f'{g:+.1f}%\nd={d:.2f}', ha='center', fontsize=10, fontweight='bold')
gc, sec = sub['Conf_Gain'].mean(), sub['Conf_Gain'].sem()
dc = cohens_d(sub['Pre_Conf'], sub['Post_Conf'])
ax2.bar(i, gc, color=MEDIUM_COLORS[m], alpha=0.6, width=0.6, yerr=sec*1.96, capsize=6, edgecolor='white', lw=1.5)
ax2.scatter(np.full(len(sub),i)+jit, sub['Conf_Gain'], color=MEDIUM_COLORS[m], s=40, alpha=0.7, edgecolors='white', lw=0.5, zorder=5)
ax2.text(i, gc+sec*1.96+0.15, f'{gc:+.2f}\nd={dc:.2f}', ha='center', fontsize=10, fontweight='bold')
ax1.axhline(0, color='gray', lw=1); ax1.set_xticks(range(3)); ax1.set_xticklabels(MEDIUM_ORDER, fontsize=12); ax1.set_ylabel('Score Gain (%)')
ax1.set_title('Score Gain')
ax2.axhline(0, color='gray', lw=1); ax2.set_xticks(range(3)); ax2.set_xticklabels(MEDIUM_ORDER, fontsize=12); ax2.set_ylabel('Confidence Gain')
ax2.set_title('Confidence Gain')
fig.suptitle('Tutoring Gains by Medium (with effect sizes)', fontsize=14, fontweight='bold')
fig.tight_layout(); fig.savefig(PLOT_DIR / 'B3_tutoring_gain_by_medium.png'); plt.close(fig)
def plot_B4_tutoring_medium_topic(paired):
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
pivot = paired.pivot_table(index='Medium', columns='Topic', values='Score_Gain', aggfunc='mean')
pivot = pivot.reindex(index=MEDIUM_ORDER, columns=TOPIC_ORDER)
sns.heatmap(pivot, annot=True, fmt='.1f', cmap='RdYlGn', center=0, ax=ax1, linewidths=1, vmin=-10, vmax=30, cbar_kws={'label':'Score Gain %'})
ax1.set_title('Mean Tutoring Score Gain'); ax1.set_ylabel('Medium')
x = np.arange(3); w = 0.25
for j, t in enumerate(TOPIC_ORDER):
means = [paired[(paired['Medium']==m)&(paired['Topic']==t)]['Score_Gain'].mean() for m in MEDIUM_ORDER]
sems = [paired[(paired['Medium']==m)&(paired['Topic']==t)]['Score_Gain'].sem()*1.96 for m in MEDIUM_ORDER]
ax2.bar(x+j*w-w, means, w, yerr=sems, capsize=3, color=TOPIC_COLORS[t], alpha=0.8, label=t, edgecolor='white')
ax2.axhline(0, color='gray', lw=0.8); ax2.set_xticks(x); ax2.set_xticklabels(MEDIUM_ORDER, fontsize=12)
ax2.set_ylabel('Score Gain (%)'); ax2.legend(title='Topic', fontsize=9); ax2.set_title('Gain by Medium and Topic')
fig.suptitle('Medium x Topic Interaction', fontsize=14, fontweight='bold')
fig.tight_layout(); fig.savefig(PLOT_DIR / 'B4_tutoring_medium_topic.png'); plt.close(fig)
def plot_B5_tutoring_dashboard(paired):
fig = plt.figure(figsize=(18, 10))
gs = fig.add_gridspec(2, 3, hspace=0.35, wspace=0.3)
x = np.arange(3); w = 0.35
# A) Absolute scores
ax = fig.add_subplot(gs[0, 0])
pre_m = [paired[paired['Medium']==m]['Pre_Score'].mean() for m in MEDIUM_ORDER]
post_m = [paired[paired['Medium']==m]['Post_Score'].mean() for m in MEDIUM_ORDER]
pre_se = [paired[paired['Medium']==m]['Pre_Score'].sem()*1.96 for m in MEDIUM_ORDER]
post_se = [paired[paired['Medium']==m]['Post_Score'].sem()*1.96 for m in MEDIUM_ORDER]
ax.bar(x-w/2, pre_m, w, yerr=pre_se, capsize=4, color='#BBDEFB', edgecolor='#1976D2', lw=1.5, label='Pre')
ax.bar(x+w/2, post_m, w, yerr=post_se, capsize=4, color=[MEDIUM_COLORS[m] for m in MEDIUM_ORDER], alpha=0.8, edgecolor='white', lw=1.5, label='Post')
ax.set_xticks(x); ax.set_xticklabels(MEDIUM_ORDER); ax.set_ylabel('Score (%)'); ax.set_ylim(40,100); ax.legend(fontsize=9); ax.set_title('A) Absolute Scores', fontweight='bold')
# B) Absolute confidence
ax = fig.add_subplot(gs[0, 1])
pre_c = [paired[paired['Medium']==m]['Pre_Conf'].mean() for m in MEDIUM_ORDER]
post_c = [paired[paired['Medium']==m]['Post_Conf'].mean() for m in MEDIUM_ORDER]
pre_cse = [paired[paired['Medium']==m]['Pre_Conf'].sem()*1.96 for m in MEDIUM_ORDER]
post_cse = [paired[paired['Medium']==m]['Post_Conf'].sem()*1.96 for m in MEDIUM_ORDER]
ax.bar(x-w/2, pre_c, w, yerr=pre_cse, capsize=4, color='#FFE0B2', edgecolor='#E65100', lw=1.5, label='Pre')
ax.bar(x+w/2, post_c, w, yerr=post_cse, capsize=4, color=[MEDIUM_COLORS[m] for m in MEDIUM_ORDER], alpha=0.8, edgecolor='white', lw=1.5, label='Post')
ax.set_xticks(x); ax.set_xticklabels(MEDIUM_ORDER); ax.set_ylabel('Confidence (1-7)'); ax.set_ylim(1,7); ax.legend(fontsize=9); ax.set_title('B) Absolute Confidence', fontweight='bold')
# C) Gains + effect sizes
ax = fig.add_subplot(gs[0, 2])
for i, m in enumerate(MEDIUM_ORDER):
sub = paired[paired['Medium']==m]
g, se = sub['Score_Gain'].mean(), sub['Score_Gain'].sem()
t, p = stats.ttest_rel(sub['Pre_Score'], sub['Post_Score'])
d = cohens_d(sub['Pre_Score'], sub['Post_Score'])
ax.bar(i, g, color=MEDIUM_COLORS[m], alpha=0.7, yerr=se*1.96, capsize=5, width=0.6)
sig = '***' if p<.001 else '**' if p<.01 else '*' if p<.05 else 'n.s.'
ax.text(i, g+se*1.96+1.5, f'{g:+.1f}%\nd={d:.2f} {sig}', ha='center', fontsize=10, fontweight='bold')
ax.axhline(0, color='gray', lw=1); ax.set_xticks(range(3)); ax.set_xticklabels(MEDIUM_ORDER); ax.set_ylabel('Score Gain (%)'); ax.set_title('C) Gains + Effect Sizes', fontweight='bold')
# D) Gain distributions
ax = fig.add_subplot(gs[1, 0])
for i, m in enumerate(MEDIUM_ORDER):
bp = ax.boxplot(paired[paired['Medium']==m]['Score_Gain'], positions=[i], widths=0.5, patch_artist=True, showmeans=True, meanprops=dict(marker='D', markerfacecolor='black', markersize=6))
bp['boxes'][0].set_facecolor(MEDIUM_COLORS[m]); bp['boxes'][0].set_alpha(0.5)
ax.axhline(0, color='gray', lw=0.8, ls='--'); ax.set_xticks(range(3)); ax.set_xticklabels(MEDIUM_ORDER); ax.set_ylabel('Score Gain (%)'); ax.set_title('D) Gain Distributions', fontweight='bold')
# E) Improved/same/declined
ax = fig.add_subplot(gs[1, 1])
for i, m in enumerate(MEDIUM_ORDER):
sub = paired[paired['Medium']==m]
imp = (sub['Score_Gain']>0).sum(); same = (sub['Score_Gain']==0).sum(); dec = (sub['Score_Gain']<0).sum(); tot = len(sub)
ax.barh([i-0.15, i, i+0.15], [imp/tot*100, same/tot*100, dec/tot*100], height=0.12, color=['#43A047','#9E9E9E','#E53935'], alpha=0.8)
ax.text(imp/tot*100+1, i-0.15, f'{imp}/{tot}', va='center', fontsize=9)
ax.set_yticks(range(3)); ax.set_yticklabels(MEDIUM_ORDER); ax.set_xlabel('% of Participants')
ax.legend([mpatches.Patch(color='#43A047'), mpatches.Patch(color='#9E9E9E'), mpatches.Patch(color='#E53935')], ['Improved','Same','Declined'], fontsize=8, loc='lower right')
ax.set_title('E) Improved / Same / Declined', fontweight='bold')
# F) Stats table
ax = fig.add_subplot(gs[1, 2]); ax.axis('off')
tdata = []
for m in MEDIUM_ORDER:
sub = paired[paired['Medium']==m]
g = sub['Score_Gain'].mean(); t, p = stats.ttest_rel(sub['Pre_Score'], sub['Post_Score'])
d = cohens_d(sub['Pre_Score'], sub['Post_Score']); n = len(sub)
sig = '***' if p<.001 else '**' if p<.01 else '*' if p<.05 else ''
tdata.append([m, str(n), f'{sub["Pre_Score"].mean():.1f}', f'{sub["Post_Score"].mean():.1f}', f'{g:+.1f}', f'{d:.2f}', f'{p:.3f}{sig}'])
table = ax.table(cellText=tdata, colLabels=['Medium','N','Pre M','Post M','Gain',"Cohen's d",'p-value'], loc='center', cellLoc='center')
table.auto_set_font_size(False); table.set_fontsize(11); table.scale(1.0, 1.8)
for j in range(7): table[0,j].set_facecolor('#E0E0E0'); table[0,j].set_text_props(fontweight='bold')
for i, m in enumerate(MEDIUM_ORDER): table[i+1,0].set_facecolor(MEDIUM_COLORS[m]); table[i+1,0].set_text_props(color='white', fontweight='bold')
ax.set_title('F) Statistical Summary', fontweight='bold', pad=20)
fig.suptitle('Tutoring Effectiveness Dashboard', fontsize=16, fontweight='bold')
fig.savefig(PLOT_DIR / 'B5_tutoring_dashboard.png'); plt.close(fig)
# =============================================================================
# C. START-TO-FINISH GAINS
# =============================================================================
def plot_C1_start_to_finish(df):
pre_r = df[df['Zeitpunkt']=='Pre-Reading'][['Participant','Topic','Medium','Score_Pct']].copy()
post_t = df[df['Zeitpunkt']=='Post-Tutoring'][['Participant','Topic','Medium','Score_Pct']].copy()
pre_r.columns = ['Participant','Topic','Medium','Start']; post_t.columns = ['Participant','Topic','Medium','End']
p = pre_r.merge(post_t, on=['Participant','Topic','Medium']); p['Gain'] = p['End'] - p['Start']
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 7))
for _, row in p.iterrows():
ax1.plot([0,1], [row['Start'], row['End']], color=MEDIUM_COLORS[row['Medium']], alpha=0.25, lw=1)
for m in MEDIUM_ORDER:
sub = p[p['Medium']==m]; sm, em = sub['Start'].mean(), sub['End'].mean()
ax1.plot([0,1], [sm, em], color=MEDIUM_COLORS[m], lw=3.5, marker='o', ms=10, label=f'{m} ({em-sm:+.1f}%)', zorder=10)
ax1.set_xticks([0,1]); ax1.set_xticklabels(['Pre-Reading\n(Start)','Post-Tutoring\n(End)'], fontsize=12)
ax1.set_ylabel('Test Score (%)'); ax1.set_ylim(0,105); ax1.legend(title='Medium (total gain)', loc='lower right'); ax1.set_title('Score Trajectory')
for m in MEDIUM_ORDER:
sub = p[p['Medium']==m]
ax2.hist(sub['Gain'], bins=10, alpha=0.5, color=MEDIUM_COLORS[m], label=f'{m} (M={sub["Gain"].mean():.1f}%)', edgecolor='white')
ax2.axvline(0, color='gray', lw=1, ls='--'); ax2.set_xlabel('Total Gain (%)'); ax2.set_ylabel('Count'); ax2.legend(title='Medium'); ax2.set_title('Gain Distribution')
fig.suptitle('Start to Finish: Pre-Reading to Post-Tutoring', fontsize=14, fontweight='bold')
fig.tight_layout(); fig.savefig(PLOT_DIR / 'C1_start_to_finish.png'); plt.close(fig)
def plot_C2_learning_gains(df):
pivot = df.pivot_table(index=['Participant','Topic','Medium'], columns='Zeitpunkt', values='Score_Pct').reset_index()
g = pd.DataFrame({'Medium': pivot['Medium'],
'Reading': pivot.get('Post-Reading',0)-pivot.get('Pre-Reading',0),
'Tutoring': pivot.get('Post-Tutoring',0)-pivot.get('Pre-Tutoring',0),
'Total': pivot.get('Post-Tutoring',0)-pivot.get('Pre-Reading',0)})
fig, axes = plt.subplots(1, 3, figsize=(18, 6))
# Scatter
ax = axes[0]
for m in MEDIUM_ORDER:
sub = g[g['Medium']==m]
ax.scatter(sub['Reading'], sub['Tutoring'], color=MEDIUM_COLORS[m], s=60, alpha=0.7, edgecolors='white', lw=0.5, label=m)
ax.axhline(0, color='gray', lw=0.8, alpha=0.5); ax.axvline(0, color='gray', lw=0.8, alpha=0.5)
ax.set_xlabel('Reading Gain (%)'); ax.set_ylabel('Tutoring Gain (%)'); ax.legend(title='Medium'); ax.set_title('Reading vs Tutoring')
# Bar
ax = axes[1]; gm = g.groupby('Medium')[['Reading','Tutoring']].agg(['mean','sem']); xp = np.arange(3); w = 0.35
for i, (gt, c, l) in enumerate([('Reading','#1976D2','Reading'),('Tutoring','#E65100','Tutoring')]):
ms = [gm.loc[m,(gt,'mean')] for m in MEDIUM_ORDER]; se = [gm.loc[m,(gt,'sem')]*1.96 for m in MEDIUM_ORDER]
bars = ax.bar(xp+i*w-w/2, ms, w, yerr=se, color=c, alpha=0.8, capsize=4, label=l)
for b, v in zip(bars, ms): ax.text(b.get_x()+b.get_width()/2, b.get_height()+1, f'{v:.1f}', ha='center', fontsize=9)
ax.set_xticks(xp); ax.set_xticklabels(MEDIUM_ORDER); ax.set_ylabel('Score Gain (%)'); ax.axhline(0, color='gray', lw=0.8); ax.legend(); ax.set_title('Mean Gains by Medium')
# Total
ax = axes[2]; tm = g.groupby('Medium')['Total'].agg(['mean','sem'])
bars = ax.bar(MEDIUM_ORDER, [tm.loc[m,'mean'] for m in MEDIUM_ORDER], color=[MEDIUM_COLORS[m] for m in MEDIUM_ORDER], alpha=0.8, yerr=[tm.loc[m,'sem']*1.96 for m in MEDIUM_ORDER], capsize=5)
for b, m in zip(bars, MEDIUM_ORDER): ax.text(b.get_x()+b.get_width()/2, b.get_height()+1, f'{tm.loc[m,"mean"]:.1f}%', ha='center', fontsize=10, fontweight='bold')
ax.set_ylabel('Total Gain (%)'); ax.axhline(0, color='gray', lw=0.8); ax.set_title('Total Learning Gain')
fig.suptitle('Learning Gains Overview', fontsize=14, fontweight='bold')
fig.tight_layout(); fig.savefig(PLOT_DIR / 'C2_learning_gains.png'); plt.close(fig)
# =============================================================================
# D. CONFIDENCE ANALYSIS
# =============================================================================
def plot_D1_confidence_vs_score(df):
fig, axes = plt.subplots(1, 3, figsize=(18, 6), sharey=True)
ax = axes[0]
ax.scatter(df['Score_Pct'], df['Avg_Confidence'], alpha=0.4, s=40, c='#546E7A', edgecolors='white', lw=0.5)
mask = df[['Score_Pct','Avg_Confidence']].dropna().index; xr = df.loc[mask,'Score_Pct']; yr = df.loc[mask,'Avg_Confidence']
if len(xr) > 2:
z = np.polyfit(xr, yr, 1); xl = np.linspace(xr.min(), xr.max(), 100)
ax.plot(xl, np.poly1d(z)(xl), 'r-', lw=2, alpha=0.8)
r = np.corrcoef(xr, yr)[0,1]
ax.text(0.05, 0.95, f'r = {r:.3f}', transform=ax.transAxes, fontsize=12, va='top', fontweight='bold', bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))
ax.set_xlabel('Test Score (%)'); ax.set_ylabel('Avg Confidence (1-7)'); ax.set_title('Overall')
ax = axes[1]
for phase, c in zip(PHASE_ORDER, ['#E8EAF6','#C5CAE9','#7986CB','#3F51B5']):
sub = df[df['Zeitpunkt']==phase]
ax.scatter(sub['Score_Pct'], sub['Avg_Confidence'], alpha=0.5, s=40, label=PHASE_SHORT[phase], edgecolors='white', lw=0.5)
ax.legend(fontsize=9, title='Phase'); ax.set_xlabel('Test Score (%)'); ax.set_title('By Phase')
ax = axes[2]
for m in MEDIUM_ORDER:
sub = df[df['Medium']==m]
ax.scatter(sub['Score_Pct'], sub['Avg_Confidence'], alpha=0.5, s=40, color=MEDIUM_COLORS[m], label=m, edgecolors='white', lw=0.5)
ax.legend(fontsize=9, title='Medium'); ax.set_xlabel('Test Score (%)'); ax.set_title('By Medium')
fig.suptitle('Confidence vs Test Score', fontsize=14, fontweight='bold')
fig.tight_layout(); fig.savefig(PLOT_DIR / 'D1_confidence_vs_score.png'); plt.close(fig)
def plot_D2_delta_conf_vs_score(df):
ps = df.pivot_table(index=['Participant','Topic','Medium'], columns='Zeitpunkt', values='Score_Pct')
pc = df.pivot_table(index=['Participant','Topic','Medium'], columns='Zeitpunkt', values='Avg_Confidence')
d = pd.DataFrame({
'R_S': ps.get('Post-Reading',0)-ps.get('Pre-Reading',0), 'R_C': pc.get('Post-Reading',0)-pc.get('Pre-Reading',0),
'T_S': ps.get('Post-Tutoring',0)-ps.get('Pre-Tutoring',0), 'T_C': pc.get('Post-Tutoring',0)-pc.get('Pre-Tutoring',0),
'A_S': ps.get('Post-Tutoring',0)-ps.get('Pre-Reading',0), 'A_C': pc.get('Post-Tutoring',0)-pc.get('Pre-Reading',0),
}).reset_index().dropna()
fig, axes = plt.subplots(1, 3, figsize=(18, 6))
for ax, (sx, sy, title) in zip(axes, [('R_S','R_C','Reading Phase'),('T_S','T_C','Tutoring Phase'),('A_S','A_C','Total')]):
for m in MEDIUM_ORDER:
sub = d[d['Medium']==m]
ax.scatter(sub[sx], sub[sy], color=MEDIUM_COLORS[m], s=50, alpha=0.6, edgecolors='white', label=m)
xv, yv = d[sx].values, d[sy].values
if len(xv) > 2:
z = np.polyfit(xv, yv, 1); xl = np.linspace(xv.min(), xv.max(), 100)
ax.plot(xl, np.poly1d(z)(xl), 'r-', lw=1.5, alpha=0.7)
r = np.corrcoef(xv, yv)[0,1]
ax.text(0.05, 0.95, f'r = {r:.3f}', transform=ax.transAxes, fontsize=11, va='top', fontweight='bold', bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))
ax.axhline(0, color='gray', lw=0.8, alpha=0.5); ax.axvline(0, color='gray', lw=0.8, alpha=0.5)
ax.set_xlabel('Score Change (%)'); ax.set_ylabel('Confidence Change'); ax.set_title(title); ax.legend(title='Medium', fontsize=8)
fig.suptitle('Do Changes in Confidence Track Changes in Score?', fontsize=14, fontweight='bold')
fig.tight_layout(); fig.savefig(PLOT_DIR / 'D2_delta_conf_vs_score.png'); plt.close(fig)
def plot_D3_calibration(df):
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
dc = df[['Score_Pct','Avg_Confidence','Zeitpunkt']].dropna().copy()
dc['Bin'] = pd.cut(dc['Avg_Confidence'], bins=[0.5,1.5,2.5,3.5,4.5,5.5,6.5,7.5], labels=list('1234567'))
cal = dc.groupby('Bin', observed=True)['Score_Pct'].agg(['mean','sem','count'])
cal = cal[cal['count']>=3]
ax1.bar(cal.index.astype(str), cal['mean'], yerr=cal['sem']*1.96, capsize=4, color='#5C6BC0', alpha=0.8, edgecolor='white')
for idx, row in cal.iterrows(): ax1.text(idx, row['mean']+2, f'n={int(row["count"])}', ha='center', fontsize=8, color='gray')
ax1.set_xlabel('Confidence Rating'); ax1.set_ylabel('Mean Test Score (%)'); ax1.set_title('Overall Calibration')
pcol = {'Pre-Reading':'#E8EAF6','Post-Reading':'#9FA8DA','Pre-Tutoring':'#5C6BC0','Post-Tutoring':'#283593'}
for phase in PHASE_ORDER:
sub = dc[dc['Zeitpunkt']==phase]
if len(sub) < 5: continue
bins = pd.cut(sub['Avg_Confidence'], bins=[0.5,2.5,4.5,7.5], labels=['Low (1-2)','Med (3-4)','High (5-7)'])
ms = sub.groupby(bins, observed=True)['Score_Pct'].mean()
ax2.plot(ms.index.astype(str), ms.values, marker='o', lw=2, ms=8, color=pcol[phase], label=PHASE_SHORT[phase])
ax2.set_xlabel('Confidence Level'); ax2.set_ylabel('Mean Test Score (%)'); ax2.legend(title='Phase'); ax2.set_title('Calibration by Phase')
fig.suptitle('Confidence Calibration', fontsize=14, fontweight='bold')
fig.tight_layout(); fig.savefig(PLOT_DIR / 'D3_calibration.png'); plt.close(fig)
# =============================================================================
# E. PERSONALITY CORRELATIONS
# =============================================================================
def plot_E1_personality_correlations(merged):
outcomes = ['Mean_Score_Gain','Mean_Conf_Gain','Mean_Total_Gain','Mean_Pre_Score','Mean_Post_Score','Mean_Pre_Conf','Mean_Post_Conf']
labels = ['Tutor\nScore Gain','Tutor\nConf Gain','Total\nGain','Pre-Tutor\nScore','Post-Tutor\nScore','Pre-Tutor\nConf','Post-Tutor\nConf']
corr = np.zeros((5, 7)); pvals = np.zeros_like(corr)
for i, t in enumerate(TRAIT_ORDER):
for j, o in enumerate(outcomes):
xv, yv = merged[t].values, merged[o].values
mask = ~(np.isnan(xv)|np.isnan(yv))
if mask.sum() > 3: corr[i,j], pvals[i,j] = stats.pearsonr(xv[mask], yv[mask])
else: corr[i,j] = np.nan; pvals[i,j] = 1
fig, ax = plt.subplots(figsize=(12, 6))
sns.heatmap(pd.DataFrame(corr, index=TRAIT_ORDER, columns=labels), annot=True, fmt='.2f', cmap='RdBu_r', center=0, vmin=-0.7, vmax=0.7, ax=ax, linewidths=1, cbar_kws={'label':'Pearson r'})
for i in range(5):
for j in range(7):
star = '**' if pvals[i,j]<.01 else '*' if pvals[i,j]<.05 else ''
if star: ax.text(j+0.5, i+0.75, star, ha='center', va='center', fontsize=12, fontweight='bold', color='black')
ax.set_title('Big Five Traits vs Tutoring Outcomes (* p<.05, ** p<.01)', fontsize=13, fontweight='bold')
fig.tight_layout(); fig.savefig(PLOT_DIR / 'E1_personality_correlations.png'); plt.close(fig)
def plot_E2_trait_vs_score_gain(merged):
fig, axes = plt.subplots(1, 5, figsize=(22, 5), sharey=True)
for ax, trait in zip(axes, TRAIT_ORDER):
xv, yv = merged[trait].values, merged['Mean_Score_Gain'].values
mask = ~(np.isnan(xv)|np.isnan(yv))
ax.scatter(xv[mask], yv[mask], s=60, color=TRAIT_COLORS[trait], alpha=0.7, edgecolors='white', lw=0.5)
for _, row in merged.iterrows():
if pd.notna(row[trait]) and pd.notna(row['Mean_Score_Gain']):
ax.annotate(row['Participant'], (row[trait], row['Mean_Score_Gain']), fontsize=7, alpha=0.5, textcoords="offset points", xytext=(3,3))
if mask.sum() > 3:
r, p = stats.pearsonr(xv[mask], yv[mask])
z = np.polyfit(xv[mask], yv[mask], 1); xl = np.linspace(xv[mask].min(), xv[mask].max(), 100)
ax.plot(xl, np.poly1d(z)(xl), color=TRAIT_COLORS[trait], lw=2, alpha=0.6)
sig = '*' if p<.05 else ''
ax.text(0.05, 0.95, f'r={r:.2f} p={p:.3f}{sig}', transform=ax.transAxes, fontsize=10, va='top', bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))
ax.axhline(0, color='gray', lw=0.5, alpha=0.5); ax.set_xlabel(trait, fontsize=11, fontweight='bold', color=TRAIT_COLORS[trait]); ax.set_xlim(1,7)
axes[0].set_ylabel('Mean Tutoring Score Gain (%)', fontsize=11)
fig.suptitle('Big Five Traits vs Tutoring Score Gains', fontsize=14, fontweight='bold')
fig.tight_layout(); fig.savefig(PLOT_DIR / 'E2_trait_vs_score_gain.png'); plt.close(fig)
# =============================================================================
# STATS EXPORT
# =============================================================================
def export_stats(df, paired):
rows = []
# --- Overall trajectory ---
phase_means = df.groupby('Zeitpunkt', observed=True).agg(
Mean_Score=('Score_Pct', 'mean'),
SEM_Score=('Score_Pct', 'sem'),
Mean_Confidence=('Avg_Confidence', 'mean'),
SEM_Confidence=('Avg_Confidence', 'sem'),
).reindex(PHASE_ORDER)
phase_means.index.name = 'Phase'
phase_means.to_csv(STATS_DIR / 'overall_trajectory.csv', float_format='%.3f')
# --- Start-to-finish gain ---
pre_r = df[df['Zeitpunkt'] == 'Pre-Reading'][['Participant', 'Topic', 'Medium', 'Score_Pct']].copy()
pre_r.columns = ['Participant', 'Topic', 'Medium', 'Start']
post_t = df[df['Zeitpunkt'] == 'Post-Tutoring'][['Participant', 'Topic', 'Medium', 'Score_Pct']].copy()
post_t.columns = ['Participant', 'Topic', 'Medium', 'End']
sf = pre_r.merge(post_t, on=['Participant', 'Topic', 'Medium'])
sf['Gain'] = sf['End'] - sf['Start']
t_sf, p_sf = stats.ttest_1samp(sf['Gain'].dropna(), 0)
sf_summary = pd.DataFrame([{
'Metric': 'Pre-Reading to Post-Tutoring',
'N': len(sf),
'Gain_Mean': sf['Gain'].mean(),
'Gain_SD': sf['Gain'].std(),
't_stat': t_sf,
'p_value': p_sf,
}])
sf_summary.to_csv(STATS_DIR / 'start_to_finish.csv', index=False, float_format='%.3f')
# --- Tutoring stats by medium ---
med_rows = []
for m in MEDIUM_ORDER:
sub = paired[paired['Medium'] == m]
t_val, p_val = stats.ttest_rel(sub['Pre_Score'], sub['Post_Score'])
d_score = cohens_d(sub['Pre_Score'], sub['Post_Score'])
t_c, p_c = stats.ttest_rel(sub['Pre_Conf'], sub['Post_Conf'])
d_conf = cohens_d(sub['Pre_Conf'], sub['Post_Conf'])
all_m = df[df['Medium'] == m]
med_rows.append({
'Medium': m,
'N_pairs': len(sub),
'Avg_Score_Mean': all_m['Score_Pct'].mean(),
'Avg_Score_SD': all_m['Score_Pct'].std(),
'Avg_Conf_Mean': all_m['Avg_Confidence'].mean(),
'Avg_Conf_SD': all_m['Avg_Confidence'].std(),
'Pre_Score_Mean': sub['Pre_Score'].mean(),
'Post_Score_Mean': sub['Post_Score'].mean(),
'Score_Gain_Mean': sub['Score_Gain'].mean(),
'Score_Gain_SD': sub['Score_Gain'].std(),
'Score_Cohens_d': d_score,
'Score_t': t_val,
'Score_p': p_val,
'Pre_Conf_Mean': sub['Pre_Conf'].mean(),
'Post_Conf_Mean': sub['Post_Conf'].mean(),
'Conf_Gain_Mean': sub['Conf_Gain'].mean(),
'Conf_Gain_SD': sub['Conf_Gain'].std(),
'Conf_Cohens_d': d_conf,
'Conf_t': t_c,
'Conf_p': p_c,
})
pd.DataFrame(med_rows).to_csv(STATS_DIR / 'tutoring_by_medium.csv', index=False, float_format='%.3f')
# --- Tutoring stats by topic ---
topic_rows = []
for topic in df['Topic'].unique():
sub_t = paired[paired['Topic'] == topic]
all_t = df[df['Topic'] == topic]
t_val, p_val = stats.ttest_rel(sub_t['Pre_Score'], sub_t['Post_Score'])
d_score = cohens_d(sub_t['Pre_Score'], sub_t['Post_Score'])
topic_rows.append({
'Topic': topic,
'N_pairs': len(sub_t),
'Avg_Score_Mean': all_t['Score_Pct'].mean(),
'Avg_Score_SD': all_t['Score_Pct'].std(),
'Avg_Conf_Mean': all_t['Avg_Confidence'].mean(),
'Avg_Conf_SD': all_t['Avg_Confidence'].std(),
'Score_Gain_Mean': sub_t['Score_Gain'].mean(),
'Score_Gain_SD': sub_t['Score_Gain'].std(),
'Score_Cohens_d': d_score,
'Score_t': t_val,
'Score_p': p_val,
})
pd.DataFrame(topic_rows).to_csv(STATS_DIR / 'tutoring_by_topic.csv', index=False, float_format='%.3f')
# --- Participant summary ---
part_rows = []
for pid in sorted(df['Participant'].unique(), key=lambda x: int(x[1:])):
sub_df = df[df['Participant'] == pid]
sub_p = paired[paired['Participant'] == pid]
phases = sub_df.groupby('Zeitpunkt', observed=True)['Score_Pct'].mean().reindex(PHASE_ORDER)
part_rows.append({
'Participant': pid,
'N_Tests': len(sub_df),
'Avg_Score_Mean': sub_df['Score_Pct'].mean(),
'Avg_Conf_Mean': sub_df['Avg_Confidence'].mean(),
'Pre_Reading': phases.get('Pre-Reading', float('nan')),
'Post_Reading': phases.get('Post-Reading', float('nan')),
'Pre_Tutoring': phases.get('Pre-Tutoring', float('nan')),
'Post_Tutoring': phases.get('Post-Tutoring', float('nan')),
'Reading_Gain': phases.get('Post-Reading', float('nan')) - phases.get('Pre-Reading', float('nan')),
'Tutoring_Gain': sub_p['Score_Gain'].mean() if len(sub_p) else float('nan'),
})
pd.DataFrame(part_rows).to_csv(STATS_DIR / 'participant_summary.csv', index=False, float_format='%.3f')
# --- Tutoring gain by medium × topic ---
mt_rows = []
for m in MEDIUM_ORDER:
for topic in df['Topic'].unique():
sub = paired[(paired['Medium'] == m) & (paired['Topic'] == topic)]
mt_rows.append({
'Medium': m,
'Topic': topic,
'N': len(sub),
'Score_Gain_Mean': sub['Score_Gain'].mean() if len(sub) else float('nan'),
'Score_Gain_SD': sub['Score_Gain'].std() if len(sub) else float('nan'),
'Score_Gain_SEM': sub['Score_Gain'].sem() if len(sub) else float('nan'),
})
pd.DataFrame(mt_rows).to_csv(STATS_DIR / 'tutoring_by_medium_topic.csv', index=False, float_format='%.3f')
print(f" Stats exported to: {STATS_DIR}")
# =============================================================================
# MAIN
# =============================================================================
def main():
print("Loading data...")
df = load_data()
paired = build_paired_tutoring(df)
personality = load_personality()
# Build merged for personality analysis
p_agg = paired.groupby('Participant').agg(
Mean_Score_Gain=('Score_Gain','mean'), Mean_Conf_Gain=('Conf_Gain','mean'),
Mean_Pre_Score=('Pre_Score','mean'), Mean_Post_Score=('Post_Score','mean'),
Mean_Pre_Conf=('Pre_Conf','mean'), Mean_Post_Conf=('Post_Conf','mean'),
).reset_index()
# Total gain
pre_r = df[df['Zeitpunkt']=='Pre-Reading'][['Participant','Topic','Medium','Score_Pct']].copy()
pre_r.columns = ['Participant','Topic','Medium','PreRead']
post_t = df[df['Zeitpunkt']=='Post-Tutoring'][['Participant','Topic','Medium','Score_Pct']].copy()
post_t.columns = ['Participant','Topic','Medium','PostTutor']
total = pre_r.merge(post_t, on=['Participant','Topic','Medium'])
total['TotalGain'] = total['PostTutor'] - total['PreRead']
tg = total.groupby('Participant')['TotalGain'].mean().reset_index()
tg.columns = ['Participant','Mean_Total_Gain']
p_agg = p_agg.merge(tg, on='Participant', how='left')
merged = p_agg.merge(personality, on='Participant', how='inner')
print(f" {len(df)} test entries, {paired['Participant'].nunique()} participants, "
f"{len(merged)} with personality data\n")
# Generate all plots
sections = [
("A. Overall Learning Trajectory", [
("A1", "Overall trajectory (score + confidence)", lambda: plot_A1_trajectory(df)),
("A2", "Trajectory by medium", lambda: plot_A2_trajectory_by_medium(df)),
("A3", "Trajectory by topic", lambda: plot_A3_trajectory_by_topic(df)),
("A4", "Participant-level heatmaps", lambda: plot_A4_heatmap(df)),
]),
("B. Tutoring Phase Deep-Dive", [
("B1", "Paired slopes by medium (with stats)", lambda: plot_B1_tutoring_slopes_by_medium(paired)),
("B2", "Paired slopes by topic (with stats)", lambda: plot_B2_tutoring_slopes_by_topic(paired)),
("B3", "Tutoring gain by medium (effect sizes)", lambda: plot_B3_tutoring_gain_by_medium(paired)),
("B4", "Medium x topic interaction", lambda: plot_B4_tutoring_medium_topic(paired)),
("B5", "Tutoring effectiveness dashboard", lambda: plot_B5_tutoring_dashboard(paired)),
]),
("C. Start-to-Finish Gains", [
("C1", "Pre-Reading to Post-Tutoring paired", lambda: plot_C1_start_to_finish(df)),
("C2", "Learning gains overview", lambda: plot_C2_learning_gains(df)),
]),
("D. Confidence Analysis", [
("D1", "Confidence vs test score scatter", lambda: plot_D1_confidence_vs_score(df)),
("D2", "Change in confidence vs change in score", lambda: plot_D2_delta_conf_vs_score(df)),
("D3", "Confidence calibration", lambda: plot_D3_calibration(df)),
]),
("E. Personality Correlations", [
("E1", "Big Five vs tutoring outcomes heatmap", lambda: plot_E1_personality_correlations(merged)),
("E2", "Trait vs tutoring score gain", lambda: plot_E2_trait_vs_score_gain(merged)),
]),
]
for section_name, plots in sections:
print(f"{section_name}")
for code, desc, fn in plots:
fn()
print(f" [{code}] {desc}")
print(f"\n16 plots saved to: {PLOT_DIR}")
print("\nExporting statistics...")
export_stats(df, paired)
print("Done.")
if __name__ == "__main__":
main()