778 lines
45 KiB
Python
778 lines
45 KiB
Python
"""
|
||
generate_plots.py
|
||
|
||
Consolidated visualization script for VirTu-Eval experiment data.
|
||
Generates all plots into Data/plots/ organized by section:
|
||
|
||
A. Overall Learning Trajectory (4 plots)
|
||
B. Tutoring Phase Deep-Dive (5 plots)
|
||
C. Start-to-Finish Gains (2 plots)
|
||
D. Confidence Analysis (3 plots)
|
||
E. Personality Correlations (2 plots)
|
||
|
||
Usage:
|
||
python generate_plots.py
|
||
"""
|
||
|
||
import csv
|
||
import pandas as pd
|
||
import numpy as np
|
||
import matplotlib
|
||
matplotlib.use('Agg')
|
||
import matplotlib.pyplot as plt
|
||
import matplotlib.patches as mpatches
|
||
from matplotlib.lines import Line2D
|
||
import seaborn as sns
|
||
from pathlib import Path
|
||
from io import StringIO
|
||
from scipy import stats
|
||
|
||
# =============================================================================
|
||
# CONFIG
|
||
# =============================================================================
|
||
BASE = Path(__file__).resolve().parent / "Data"
|
||
PLOT_DIR = BASE / "plots"
|
||
PLOT_DIR.mkdir(exist_ok=True)
|
||
STATS_DIR = BASE / "stats"
|
||
STATS_DIR.mkdir(exist_ok=True)
|
||
|
||
PHASE_ORDER = ['Pre-Reading', 'Post-Reading', 'Pre-Tutoring', 'Post-Tutoring']
|
||
PHASE_LABELS = ['Pre-Read', 'Post-Read', 'Pre-Tutor', 'Post-Tutor']
|
||
PHASE_SHORT = dict(zip(PHASE_ORDER, PHASE_LABELS))
|
||
|
||
MEDIUM_ORDER = ['Chat', 'Video', 'VR']
|
||
MEDIUM_COLORS = {'Chat': '#2196F3', 'Video': '#FF9800', 'VR': '#4CAF50'}
|
||
|
||
TOPIC_ORDER = ['Mendel', 'DNA-Replikation', 'Ökologie']
|
||
TOPIC_COLORS = {'Mendel': '#E91E63', 'DNA-Replikation': '#9C27B0', 'Ökologie': '#009688'}
|
||
|
||
BFI_TRAITS = {
|
||
'Neuroticism': {'items': [1, 2, 3], 'reverse': [3]},
|
||
'Extraversion': {'items': [4, 5, 6], 'reverse': [6]},
|
||
'Openness': {'items': [7, 8, 9], 'reverse': []},
|
||
'Agreeableness': {'items': [10, 11, 12], 'reverse': [10]},
|
||
'Conscientiousness': {'items': [13, 14, 15], 'reverse': [14]},
|
||
}
|
||
TRAIT_ORDER = list(BFI_TRAITS.keys())
|
||
TRAIT_COLORS = {'Neuroticism': '#E53935', 'Extraversion': '#FB8C00',
|
||
'Openness': '#43A047', 'Agreeableness': '#1E88E5',
|
||
'Conscientiousness': '#8E24AA'}
|
||
|
||
sns.set_theme(style="whitegrid", font_scale=1.05)
|
||
plt.rcParams['figure.dpi'] = 150
|
||
plt.rcParams['savefig.bbox'] = 'tight'
|
||
|
||
|
||
def cohens_d(pre, post):
|
||
diff = post - pre
|
||
return diff.mean() / diff.std() if diff.std() > 0 else 0
|
||
|
||
|
||
# =============================================================================
|
||
# DATA LOADING
|
||
# =============================================================================
|
||
|
||
def load_data():
|
||
df = pd.read_csv(BASE / "test_scores_all.csv", encoding="utf-8-sig")
|
||
# Normalize typo "Pre-Tutor" -> "Pre-Tutoring"
|
||
df['Zeitpunkt'] = df['Zeitpunkt'].str.strip().replace('Pre-Tutor', 'Pre-Tutoring')
|
||
df['Phase'] = pd.Categorical(df['Zeitpunkt'], categories=PHASE_ORDER, ordered=True)
|
||
df['Phase_Label'] = pd.Categorical(
|
||
df['Zeitpunkt'].map(PHASE_SHORT), categories=PHASE_LABELS, ordered=True)
|
||
df['Phase_Idx'] = df['Zeitpunkt'].map({p: i for i, p in enumerate(PHASE_ORDER)})
|
||
df['P_Num'] = df['Participant'].str.extract(r'(\d+)').astype(int)
|
||
return df
|
||
|
||
|
||
def build_paired_tutoring(df):
|
||
pre = df[df['Zeitpunkt'] == 'Pre-Tutoring'][
|
||
['Participant', 'Topic', 'Medium', 'Score_Pct', 'Avg_Confidence']].copy()
|
||
post = df[df['Zeitpunkt'] == 'Post-Tutoring'][
|
||
['Participant', 'Topic', 'Medium', 'Score_Pct', 'Avg_Confidence']].copy()
|
||
pre.columns = ['Participant', 'Topic', 'Medium', 'Pre_Score', 'Pre_Conf']
|
||
post.columns = ['Participant', 'Topic', 'Medium', 'Post_Score', 'Post_Conf']
|
||
paired = pre.merge(post, on=['Participant', 'Topic', 'Medium'])
|
||
paired['Score_Gain'] = paired['Post_Score'] - paired['Pre_Score']
|
||
paired['Conf_Gain'] = paired['Post_Conf'] - paired['Pre_Conf']
|
||
paired['P_Num'] = paired['Participant'].str.extract(r'(\d+)').astype(int)
|
||
return paired
|
||
|
||
|
||
def load_personality():
|
||
path = BASE / "Final-Questionnaire.csv"
|
||
with open(path, encoding="utf-8-sig") as f:
|
||
reader = csv.reader(StringIO(f.read()))
|
||
rows = list(reader)
|
||
header, data = rows[0], rows[1:]
|
||
records = []
|
||
for row in data:
|
||
pid = row[-1].strip()
|
||
if not pid:
|
||
continue
|
||
pid = pid if pid.startswith('P') else f'P{pid}'
|
||
items = {}
|
||
for i in range(1, 16):
|
||
try:
|
||
items[i] = int(row[i].strip())
|
||
except (ValueError, IndexError):
|
||
items[i] = np.nan
|
||
traits = {}
|
||
for trait, info in BFI_TRAITS.items():
|
||
vals = []
|
||
for it in info['items']:
|
||
v = items.get(it, np.nan)
|
||
if pd.notna(v):
|
||
vals.append(8 - v if it in info['reverse'] else v)
|
||
traits[trait] = np.mean(vals) if vals else np.nan
|
||
rec = {'Participant': pid}
|
||
rec.update(traits)
|
||
records.append(rec)
|
||
return pd.DataFrame(records)
|
||
|
||
|
||
# =============================================================================
|
||
# A. OVERALL LEARNING TRAJECTORY
|
||
# =============================================================================
|
||
|
||
def plot_A1_trajectory(df):
|
||
fig, ax1 = plt.subplots(figsize=(10, 6))
|
||
means = df.groupby('Phase_Label', observed=True).agg(
|
||
S=('Score_Pct', 'mean'), S_se=('Score_Pct', 'sem'),
|
||
C=('Avg_Confidence', 'mean'), C_se=('Avg_Confidence', 'sem'),
|
||
).reindex(PHASE_LABELS)
|
||
x = np.arange(4)
|
||
c1, c2 = '#1976D2', '#E65100'
|
||
ax1.errorbar(x, means['S'], yerr=means['S_se']*1.96, color=c1, marker='o',
|
||
markersize=10, linewidth=2.5, capsize=5, capthick=2, label='Score %', zorder=5)
|
||
ax1.set_ylabel('Test Score (%)', color=c1, fontsize=13); ax1.set_ylim(30, 100)
|
||
ax1.tick_params(axis='y', labelcolor=c1)
|
||
ax2 = ax1.twinx()
|
||
ax2.errorbar(x, means['C'], yerr=means['C_se']*1.96, color=c2, marker='s',
|
||
markersize=10, linewidth=2.5, capsize=5, capthick=2, linestyle='--',
|
||
label='Confidence', zorder=5)
|
||
ax2.set_ylabel('Avg Confidence (1-7)', color=c2, fontsize=13); ax2.set_ylim(1, 7)
|
||
ax2.tick_params(axis='y', labelcolor=c2)
|
||
ax1.set_xticks(x); ax1.set_xticklabels(PHASE_LABELS, fontsize=12)
|
||
for i, row in means.iterrows():
|
||
idx = PHASE_LABELS.index(i)
|
||
ax1.annotate(f'{row["S"]:.1f}%', (idx, row['S']), textcoords="offset points",
|
||
xytext=(0, 14), ha='center', fontsize=10, color=c1, fontweight='bold')
|
||
ax2.annotate(f'{row["C"]:.2f}', (idx, row['C']), textcoords="offset points",
|
||
xytext=(0, -18), ha='center', fontsize=10, color=c2, fontweight='bold')
|
||
h1, l1 = ax1.get_legend_handles_labels()
|
||
h2, l2 = ax2.get_legend_handles_labels()
|
||
ax1.legend(h1+h2, l1+l2, loc='lower right', fontsize=11)
|
||
ax1.annotate('', xy=(0.32,-0.12), xytext=(0,-0.12), arrowprops=dict(arrowstyle='<->',color='gray',lw=1.5), annotation_clip=False, xycoords='axes fraction')
|
||
ax1.annotate('', xy=(1,-0.12), xytext=(0.68,-0.12), arrowprops=dict(arrowstyle='<->',color='gray',lw=1.5), annotation_clip=False, xycoords='axes fraction')
|
||
ax1.text(0.16,-0.17,'Reading Phase',transform=ax1.transAxes,ha='center',fontsize=10,color='gray')
|
||
ax1.text(0.84,-0.17,'Tutoring Phase',transform=ax1.transAxes,ha='center',fontsize=10,color='gray')
|
||
fig.suptitle('Overall Learning Trajectory', fontsize=15, fontweight='bold')
|
||
fig.savefig(PLOT_DIR / 'A1_trajectory.png', bbox_inches='tight'); plt.close(fig)
|
||
|
||
|
||
def plot_A2_trajectory_by_medium(df):
|
||
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
|
||
x = np.arange(4); off = [-0.1, 0, 0.1]
|
||
for j, m in enumerate(MEDIUM_ORDER):
|
||
sub = df[df['Medium'] == m]
|
||
ms = sub.groupby('Phase_Label', observed=True)['Score_Pct'].agg(['mean','sem']).reindex(PHASE_LABELS)
|
||
ax1.errorbar(x+off[j], ms['mean'], yerr=ms['sem']*1.96, color=MEDIUM_COLORS[m],
|
||
marker='o', markersize=8, linewidth=2, capsize=4, label=m)
|
||
mc = sub.groupby('Phase_Label', observed=True)['Avg_Confidence'].agg(['mean','sem']).reindex(PHASE_LABELS)
|
||
ax2.errorbar(x+off[j], mc['mean'], yerr=mc['sem']*1.96, color=MEDIUM_COLORS[m],
|
||
marker='s', markersize=8, linewidth=2, capsize=4, linestyle='--', label=m)
|
||
ax1.set_xticks(x); ax1.set_xticklabels(PHASE_LABELS); ax1.set_ylabel('Test Score (%)'); ax1.set_ylim(30,100); ax1.legend(title='Medium'); ax1.set_title('Score')
|
||
ax2.set_xticks(x); ax2.set_xticklabels(PHASE_LABELS); ax2.set_ylabel('Avg Confidence (1-7)'); ax2.set_ylim(1,7); ax2.legend(title='Medium'); ax2.set_title('Confidence')
|
||
fig.suptitle('Learning Trajectories by Medium', fontsize=14, fontweight='bold')
|
||
fig.tight_layout(); fig.savefig(PLOT_DIR / 'A2_trajectory_by_medium.png'); plt.close(fig)
|
||
|
||
|
||
def plot_A3_trajectory_by_topic(df):
|
||
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
|
||
x = np.arange(4); off = [-0.1, 0, 0.1]
|
||
for j, t in enumerate(TOPIC_ORDER):
|
||
sub = df[df['Topic'] == t]
|
||
ms = sub.groupby('Phase_Label', observed=True)['Score_Pct'].agg(['mean','sem']).reindex(PHASE_LABELS)
|
||
ax1.errorbar(x+off[j], ms['mean'], yerr=ms['sem']*1.96, color=TOPIC_COLORS[t],
|
||
marker='o', markersize=8, linewidth=2, capsize=4, label=t)
|
||
mc = sub.groupby('Phase_Label', observed=True)['Avg_Confidence'].agg(['mean','sem']).reindex(PHASE_LABELS)
|
||
ax2.errorbar(x+off[j], mc['mean'], yerr=mc['sem']*1.96, color=TOPIC_COLORS[t],
|
||
marker='s', markersize=8, linewidth=2, capsize=4, linestyle='--', label=t)
|
||
ax1.set_xticks(x); ax1.set_xticklabels(PHASE_LABELS); ax1.set_ylabel('Test Score (%)'); ax1.set_ylim(30,100); ax1.legend(title='Topic'); ax1.set_title('Score')
|
||
ax2.set_xticks(x); ax2.set_xticklabels(PHASE_LABELS); ax2.set_ylabel('Avg Confidence (1-7)'); ax2.set_ylim(1,7); ax2.legend(title='Topic'); ax2.set_title('Confidence')
|
||
fig.suptitle('Learning Trajectories by Topic', fontsize=14, fontweight='bold')
|
||
fig.tight_layout(); fig.savefig(PLOT_DIR / 'A3_trajectory_by_topic.png'); plt.close(fig)
|
||
|
||
|
||
def plot_A4_heatmap(df):
|
||
pivot_s = df.pivot_table(index='Participant', columns='Zeitpunkt', values='Score_Pct', aggfunc='mean')
|
||
pivot_s = pivot_s.reindex(columns=PHASE_ORDER).reindex(sorted(pivot_s.index, key=lambda x: int(x[1:])))
|
||
pivot_s.columns = PHASE_LABELS
|
||
pivot_c = df.pivot_table(index='Participant', columns='Zeitpunkt', values='Avg_Confidence', aggfunc='mean')
|
||
pivot_c = pivot_c.reindex(columns=PHASE_ORDER).reindex(sorted(pivot_c.index, key=lambda x: int(x[1:])))
|
||
pivot_c.columns = PHASE_LABELS
|
||
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 8))
|
||
sns.heatmap(pivot_s, annot=True, fmt='.0f', cmap='RdYlGn', vmin=20, vmax=100, ax=ax1, linewidths=.5, cbar_kws={'label':'Score %'})
|
||
ax1.set_title('Test Scores'); ax1.set_ylabel('Participant')
|
||
sns.heatmap(pivot_c, annot=True, fmt='.1f', cmap='YlOrRd', vmin=1, vmax=7, ax=ax2, linewidths=.5, cbar_kws={'label':'Confidence (1-7)'})
|
||
ax2.set_title('Confidence'); ax2.set_ylabel('')
|
||
fig.suptitle('Participant-Level Heatmaps', fontsize=14, fontweight='bold')
|
||
fig.tight_layout(); fig.savefig(PLOT_DIR / 'A4_heatmap.png'); plt.close(fig)
|
||
|
||
|
||
# =============================================================================
|
||
# B. TUTORING PHASE DEEP-DIVE
|
||
# =============================================================================
|
||
|
||
def plot_B1_tutoring_slopes_by_medium(paired):
|
||
fig, axes = plt.subplots(1, 3, figsize=(18, 7), sharey=True)
|
||
for ax, medium in zip(axes, MEDIUM_ORDER):
|
||
sub = paired[paired['Medium'] == medium].sort_values('P_Num')
|
||
for _, row in sub.iterrows():
|
||
ax.plot([0,1], [row['Pre_Score'], row['Post_Score']], color=TOPIC_COLORS[row['Topic']],
|
||
alpha=0.5, linewidth=1.5, marker='o', markersize=5)
|
||
ax.annotate(row['Participant'], (1.02, row['Post_Score']), fontsize=7, va='center', alpha=0.6)
|
||
pre_m, post_m = sub['Pre_Score'].mean(), sub['Post_Score'].mean()
|
||
ax.plot([0,1], [pre_m, post_m], color=MEDIUM_COLORS[medium], linewidth=4, marker='D',
|
||
markersize=12, zorder=10, markeredgecolor='white', markeredgewidth=2)
|
||
t, p = stats.ttest_rel(sub['Pre_Score'], sub['Post_Score'])
|
||
d = cohens_d(sub['Pre_Score'], sub['Post_Score'])
|
||
sig = '***' if p<.001 else '**' if p<.01 else '*' if p<.05 else 'n.s.'
|
||
ax.text(0.5, 0.02, f'Gain: {post_m-pre_m:+.1f}% d={d:.2f}\nt={t:.2f}, p={p:.3f} {sig}',
|
||
transform=ax.transAxes, ha='center', fontsize=10,
|
||
bbox=dict(boxstyle='round,pad=0.4', facecolor='lightyellow', alpha=0.9))
|
||
ax.set_xticks([0,1]); ax.set_xticklabels(['Pre-Tutoring','Post-Tutoring'], fontsize=11)
|
||
ax.set_title(medium, fontsize=14, fontweight='bold', color=MEDIUM_COLORS[medium])
|
||
ax.set_ylim(-5, 110)
|
||
axes[0].set_ylabel('Test Score (%)', fontsize=12)
|
||
legend_el = [Line2D([0],[0], color=TOPIC_COLORS[t], lw=2, marker='o', ms=6, label=t) for t in TOPIC_ORDER]
|
||
legend_el.append(Line2D([0],[0], color='gray', lw=4, marker='D', ms=8, label='Medium Mean'))
|
||
fig.legend(handles=legend_el, loc='upper center', ncol=4, fontsize=10, bbox_to_anchor=(0.5, 0.02))
|
||
fig.suptitle('Tutoring: Individual Trajectories by Medium', fontsize=15, fontweight='bold')
|
||
fig.tight_layout(rect=[0,0.05,1,0.96]); fig.savefig(PLOT_DIR / 'B1_tutoring_slopes_by_medium.png'); plt.close(fig)
|
||
|
||
|
||
def plot_B2_tutoring_slopes_by_topic(paired):
|
||
fig, axes = plt.subplots(1, 3, figsize=(18, 7), sharey=True)
|
||
for ax, topic in zip(axes, TOPIC_ORDER):
|
||
sub = paired[paired['Topic'] == topic].sort_values('P_Num')
|
||
for _, row in sub.iterrows():
|
||
ax.plot([0,1], [row['Pre_Score'], row['Post_Score']], color=MEDIUM_COLORS[row['Medium']],
|
||
alpha=0.5, linewidth=1.5, marker='o', markersize=5)
|
||
ax.annotate(row['Participant'], (1.02, row['Post_Score']), fontsize=7, va='center', alpha=0.6)
|
||
for medium in MEDIUM_ORDER:
|
||
msub = sub[sub['Medium'] == medium]
|
||
if len(msub) > 0:
|
||
pm, qm = msub['Pre_Score'].mean(), msub['Post_Score'].mean()
|
||
ax.plot([0,1], [pm, qm], color=MEDIUM_COLORS[medium], linewidth=3.5,
|
||
marker='D', markersize=10, zorder=10, markeredgecolor='white', markeredgewidth=2,
|
||
label=f'{medium} ({qm-pm:+.1f}%)')
|
||
t, p = stats.ttest_rel(sub['Pre_Score'], sub['Post_Score'])
|
||
d = cohens_d(sub['Pre_Score'], sub['Post_Score'])
|
||
sig = '***' if p<.001 else '**' if p<.01 else '*' if p<.05 else 'n.s.'
|
||
ax.text(0.5, 0.02, f'Overall: {sub["Score_Gain"].mean():+.1f}% d={d:.2f}\np={p:.3f} {sig}',
|
||
transform=ax.transAxes, ha='center', fontsize=10,
|
||
bbox=dict(boxstyle='round,pad=0.4', facecolor='lightyellow', alpha=0.9))
|
||
ax.set_xticks([0,1]); ax.set_xticklabels(['Pre-Tutoring','Post-Tutoring'], fontsize=11)
|
||
ax.set_title(topic, fontsize=14, fontweight='bold', color=TOPIC_COLORS[topic])
|
||
ax.set_ylim(-5, 110); ax.legend(fontsize=9, loc='upper left')
|
||
axes[0].set_ylabel('Test Score (%)', fontsize=12)
|
||
fig.suptitle('Tutoring: Individual Trajectories by Topic', fontsize=15, fontweight='bold')
|
||
fig.tight_layout(rect=[0,0,1,0.96]); fig.savefig(PLOT_DIR / 'B2_tutoring_slopes_by_topic.png'); plt.close(fig)
|
||
|
||
|
||
def plot_B3_tutoring_gain_by_medium(paired):
|
||
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6.5))
|
||
rng = np.random.default_rng(42)
|
||
for i, m in enumerate(MEDIUM_ORDER):
|
||
sub = paired[paired['Medium'] == m]
|
||
g, se = sub['Score_Gain'].mean(), sub['Score_Gain'].sem()
|
||
d = cohens_d(sub['Pre_Score'], sub['Post_Score'])
|
||
ax1.bar(i, g, color=MEDIUM_COLORS[m], alpha=0.6, width=0.6, yerr=se*1.96, capsize=6, edgecolor='white', lw=1.5)
|
||
jit = rng.uniform(-0.15, 0.15, len(sub))
|
||
ax1.scatter(np.full(len(sub),i)+jit, sub['Score_Gain'], color=MEDIUM_COLORS[m], s=40, alpha=0.7, edgecolors='white', lw=0.5, zorder=5)
|
||
ax1.text(i, g+se*1.96+2, f'{g:+.1f}%\nd={d:.2f}', ha='center', fontsize=10, fontweight='bold')
|
||
gc, sec = sub['Conf_Gain'].mean(), sub['Conf_Gain'].sem()
|
||
dc = cohens_d(sub['Pre_Conf'], sub['Post_Conf'])
|
||
ax2.bar(i, gc, color=MEDIUM_COLORS[m], alpha=0.6, width=0.6, yerr=sec*1.96, capsize=6, edgecolor='white', lw=1.5)
|
||
ax2.scatter(np.full(len(sub),i)+jit, sub['Conf_Gain'], color=MEDIUM_COLORS[m], s=40, alpha=0.7, edgecolors='white', lw=0.5, zorder=5)
|
||
ax2.text(i, gc+sec*1.96+0.15, f'{gc:+.2f}\nd={dc:.2f}', ha='center', fontsize=10, fontweight='bold')
|
||
ax1.axhline(0, color='gray', lw=1); ax1.set_xticks(range(3)); ax1.set_xticklabels(MEDIUM_ORDER, fontsize=12); ax1.set_ylabel('Score Gain (%)')
|
||
ax1.set_title('Score Gain')
|
||
ax2.axhline(0, color='gray', lw=1); ax2.set_xticks(range(3)); ax2.set_xticklabels(MEDIUM_ORDER, fontsize=12); ax2.set_ylabel('Confidence Gain')
|
||
ax2.set_title('Confidence Gain')
|
||
fig.suptitle('Tutoring Gains by Medium (with effect sizes)', fontsize=14, fontweight='bold')
|
||
fig.tight_layout(); fig.savefig(PLOT_DIR / 'B3_tutoring_gain_by_medium.png'); plt.close(fig)
|
||
|
||
|
||
def plot_B4_tutoring_medium_topic(paired):
|
||
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
|
||
pivot = paired.pivot_table(index='Medium', columns='Topic', values='Score_Gain', aggfunc='mean')
|
||
pivot = pivot.reindex(index=MEDIUM_ORDER, columns=TOPIC_ORDER)
|
||
sns.heatmap(pivot, annot=True, fmt='.1f', cmap='RdYlGn', center=0, ax=ax1, linewidths=1, vmin=-10, vmax=30, cbar_kws={'label':'Score Gain %'})
|
||
ax1.set_title('Mean Tutoring Score Gain'); ax1.set_ylabel('Medium')
|
||
x = np.arange(3); w = 0.25
|
||
for j, t in enumerate(TOPIC_ORDER):
|
||
means = [paired[(paired['Medium']==m)&(paired['Topic']==t)]['Score_Gain'].mean() for m in MEDIUM_ORDER]
|
||
sems = [paired[(paired['Medium']==m)&(paired['Topic']==t)]['Score_Gain'].sem()*1.96 for m in MEDIUM_ORDER]
|
||
ax2.bar(x+j*w-w, means, w, yerr=sems, capsize=3, color=TOPIC_COLORS[t], alpha=0.8, label=t, edgecolor='white')
|
||
ax2.axhline(0, color='gray', lw=0.8); ax2.set_xticks(x); ax2.set_xticklabels(MEDIUM_ORDER, fontsize=12)
|
||
ax2.set_ylabel('Score Gain (%)'); ax2.legend(title='Topic', fontsize=9); ax2.set_title('Gain by Medium and Topic')
|
||
fig.suptitle('Medium x Topic Interaction', fontsize=14, fontweight='bold')
|
||
fig.tight_layout(); fig.savefig(PLOT_DIR / 'B4_tutoring_medium_topic.png'); plt.close(fig)
|
||
|
||
|
||
def plot_B5_tutoring_dashboard(paired):
|
||
fig = plt.figure(figsize=(18, 10))
|
||
gs = fig.add_gridspec(2, 3, hspace=0.35, wspace=0.3)
|
||
x = np.arange(3); w = 0.35
|
||
|
||
# A) Absolute scores
|
||
ax = fig.add_subplot(gs[0, 0])
|
||
pre_m = [paired[paired['Medium']==m]['Pre_Score'].mean() for m in MEDIUM_ORDER]
|
||
post_m = [paired[paired['Medium']==m]['Post_Score'].mean() for m in MEDIUM_ORDER]
|
||
pre_se = [paired[paired['Medium']==m]['Pre_Score'].sem()*1.96 for m in MEDIUM_ORDER]
|
||
post_se = [paired[paired['Medium']==m]['Post_Score'].sem()*1.96 for m in MEDIUM_ORDER]
|
||
ax.bar(x-w/2, pre_m, w, yerr=pre_se, capsize=4, color='#BBDEFB', edgecolor='#1976D2', lw=1.5, label='Pre')
|
||
ax.bar(x+w/2, post_m, w, yerr=post_se, capsize=4, color=[MEDIUM_COLORS[m] for m in MEDIUM_ORDER], alpha=0.8, edgecolor='white', lw=1.5, label='Post')
|
||
ax.set_xticks(x); ax.set_xticklabels(MEDIUM_ORDER); ax.set_ylabel('Score (%)'); ax.set_ylim(40,100); ax.legend(fontsize=9); ax.set_title('A) Absolute Scores', fontweight='bold')
|
||
|
||
# B) Absolute confidence
|
||
ax = fig.add_subplot(gs[0, 1])
|
||
pre_c = [paired[paired['Medium']==m]['Pre_Conf'].mean() for m in MEDIUM_ORDER]
|
||
post_c = [paired[paired['Medium']==m]['Post_Conf'].mean() for m in MEDIUM_ORDER]
|
||
pre_cse = [paired[paired['Medium']==m]['Pre_Conf'].sem()*1.96 for m in MEDIUM_ORDER]
|
||
post_cse = [paired[paired['Medium']==m]['Post_Conf'].sem()*1.96 for m in MEDIUM_ORDER]
|
||
ax.bar(x-w/2, pre_c, w, yerr=pre_cse, capsize=4, color='#FFE0B2', edgecolor='#E65100', lw=1.5, label='Pre')
|
||
ax.bar(x+w/2, post_c, w, yerr=post_cse, capsize=4, color=[MEDIUM_COLORS[m] for m in MEDIUM_ORDER], alpha=0.8, edgecolor='white', lw=1.5, label='Post')
|
||
ax.set_xticks(x); ax.set_xticklabels(MEDIUM_ORDER); ax.set_ylabel('Confidence (1-7)'); ax.set_ylim(1,7); ax.legend(fontsize=9); ax.set_title('B) Absolute Confidence', fontweight='bold')
|
||
|
||
# C) Gains + effect sizes
|
||
ax = fig.add_subplot(gs[0, 2])
|
||
for i, m in enumerate(MEDIUM_ORDER):
|
||
sub = paired[paired['Medium']==m]
|
||
g, se = sub['Score_Gain'].mean(), sub['Score_Gain'].sem()
|
||
t, p = stats.ttest_rel(sub['Pre_Score'], sub['Post_Score'])
|
||
d = cohens_d(sub['Pre_Score'], sub['Post_Score'])
|
||
ax.bar(i, g, color=MEDIUM_COLORS[m], alpha=0.7, yerr=se*1.96, capsize=5, width=0.6)
|
||
sig = '***' if p<.001 else '**' if p<.01 else '*' if p<.05 else 'n.s.'
|
||
ax.text(i, g+se*1.96+1.5, f'{g:+.1f}%\nd={d:.2f} {sig}', ha='center', fontsize=10, fontweight='bold')
|
||
ax.axhline(0, color='gray', lw=1); ax.set_xticks(range(3)); ax.set_xticklabels(MEDIUM_ORDER); ax.set_ylabel('Score Gain (%)'); ax.set_title('C) Gains + Effect Sizes', fontweight='bold')
|
||
|
||
# D) Gain distributions
|
||
ax = fig.add_subplot(gs[1, 0])
|
||
for i, m in enumerate(MEDIUM_ORDER):
|
||
bp = ax.boxplot(paired[paired['Medium']==m]['Score_Gain'], positions=[i], widths=0.5, patch_artist=True, showmeans=True, meanprops=dict(marker='D', markerfacecolor='black', markersize=6))
|
||
bp['boxes'][0].set_facecolor(MEDIUM_COLORS[m]); bp['boxes'][0].set_alpha(0.5)
|
||
ax.axhline(0, color='gray', lw=0.8, ls='--'); ax.set_xticks(range(3)); ax.set_xticklabels(MEDIUM_ORDER); ax.set_ylabel('Score Gain (%)'); ax.set_title('D) Gain Distributions', fontweight='bold')
|
||
|
||
# E) Improved/same/declined
|
||
ax = fig.add_subplot(gs[1, 1])
|
||
for i, m in enumerate(MEDIUM_ORDER):
|
||
sub = paired[paired['Medium']==m]
|
||
imp = (sub['Score_Gain']>0).sum(); same = (sub['Score_Gain']==0).sum(); dec = (sub['Score_Gain']<0).sum(); tot = len(sub)
|
||
ax.barh([i-0.15, i, i+0.15], [imp/tot*100, same/tot*100, dec/tot*100], height=0.12, color=['#43A047','#9E9E9E','#E53935'], alpha=0.8)
|
||
ax.text(imp/tot*100+1, i-0.15, f'{imp}/{tot}', va='center', fontsize=9)
|
||
ax.set_yticks(range(3)); ax.set_yticklabels(MEDIUM_ORDER); ax.set_xlabel('% of Participants')
|
||
ax.legend([mpatches.Patch(color='#43A047'), mpatches.Patch(color='#9E9E9E'), mpatches.Patch(color='#E53935')], ['Improved','Same','Declined'], fontsize=8, loc='lower right')
|
||
ax.set_title('E) Improved / Same / Declined', fontweight='bold')
|
||
|
||
# F) Stats table
|
||
ax = fig.add_subplot(gs[1, 2]); ax.axis('off')
|
||
tdata = []
|
||
for m in MEDIUM_ORDER:
|
||
sub = paired[paired['Medium']==m]
|
||
g = sub['Score_Gain'].mean(); t, p = stats.ttest_rel(sub['Pre_Score'], sub['Post_Score'])
|
||
d = cohens_d(sub['Pre_Score'], sub['Post_Score']); n = len(sub)
|
||
sig = '***' if p<.001 else '**' if p<.01 else '*' if p<.05 else ''
|
||
tdata.append([m, str(n), f'{sub["Pre_Score"].mean():.1f}', f'{sub["Post_Score"].mean():.1f}', f'{g:+.1f}', f'{d:.2f}', f'{p:.3f}{sig}'])
|
||
table = ax.table(cellText=tdata, colLabels=['Medium','N','Pre M','Post M','Gain',"Cohen's d",'p-value'], loc='center', cellLoc='center')
|
||
table.auto_set_font_size(False); table.set_fontsize(11); table.scale(1.0, 1.8)
|
||
for j in range(7): table[0,j].set_facecolor('#E0E0E0'); table[0,j].set_text_props(fontweight='bold')
|
||
for i, m in enumerate(MEDIUM_ORDER): table[i+1,0].set_facecolor(MEDIUM_COLORS[m]); table[i+1,0].set_text_props(color='white', fontweight='bold')
|
||
ax.set_title('F) Statistical Summary', fontweight='bold', pad=20)
|
||
|
||
fig.suptitle('Tutoring Effectiveness Dashboard', fontsize=16, fontweight='bold')
|
||
fig.savefig(PLOT_DIR / 'B5_tutoring_dashboard.png'); plt.close(fig)
|
||
|
||
|
||
# =============================================================================
|
||
# C. START-TO-FINISH GAINS
|
||
# =============================================================================
|
||
|
||
def plot_C1_start_to_finish(df):
|
||
pre_r = df[df['Zeitpunkt']=='Pre-Reading'][['Participant','Topic','Medium','Score_Pct']].copy()
|
||
post_t = df[df['Zeitpunkt']=='Post-Tutoring'][['Participant','Topic','Medium','Score_Pct']].copy()
|
||
pre_r.columns = ['Participant','Topic','Medium','Start']; post_t.columns = ['Participant','Topic','Medium','End']
|
||
p = pre_r.merge(post_t, on=['Participant','Topic','Medium']); p['Gain'] = p['End'] - p['Start']
|
||
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 7))
|
||
for _, row in p.iterrows():
|
||
ax1.plot([0,1], [row['Start'], row['End']], color=MEDIUM_COLORS[row['Medium']], alpha=0.25, lw=1)
|
||
for m in MEDIUM_ORDER:
|
||
sub = p[p['Medium']==m]; sm, em = sub['Start'].mean(), sub['End'].mean()
|
||
ax1.plot([0,1], [sm, em], color=MEDIUM_COLORS[m], lw=3.5, marker='o', ms=10, label=f'{m} ({em-sm:+.1f}%)', zorder=10)
|
||
ax1.set_xticks([0,1]); ax1.set_xticklabels(['Pre-Reading\n(Start)','Post-Tutoring\n(End)'], fontsize=12)
|
||
ax1.set_ylabel('Test Score (%)'); ax1.set_ylim(0,105); ax1.legend(title='Medium (total gain)', loc='lower right'); ax1.set_title('Score Trajectory')
|
||
for m in MEDIUM_ORDER:
|
||
sub = p[p['Medium']==m]
|
||
ax2.hist(sub['Gain'], bins=10, alpha=0.5, color=MEDIUM_COLORS[m], label=f'{m} (M={sub["Gain"].mean():.1f}%)', edgecolor='white')
|
||
ax2.axvline(0, color='gray', lw=1, ls='--'); ax2.set_xlabel('Total Gain (%)'); ax2.set_ylabel('Count'); ax2.legend(title='Medium'); ax2.set_title('Gain Distribution')
|
||
fig.suptitle('Start to Finish: Pre-Reading to Post-Tutoring', fontsize=14, fontweight='bold')
|
||
fig.tight_layout(); fig.savefig(PLOT_DIR / 'C1_start_to_finish.png'); plt.close(fig)
|
||
|
||
|
||
def plot_C2_learning_gains(df):
|
||
pivot = df.pivot_table(index=['Participant','Topic','Medium'], columns='Zeitpunkt', values='Score_Pct').reset_index()
|
||
g = pd.DataFrame({'Medium': pivot['Medium'],
|
||
'Reading': pivot.get('Post-Reading',0)-pivot.get('Pre-Reading',0),
|
||
'Tutoring': pivot.get('Post-Tutoring',0)-pivot.get('Pre-Tutoring',0),
|
||
'Total': pivot.get('Post-Tutoring',0)-pivot.get('Pre-Reading',0)})
|
||
fig, axes = plt.subplots(1, 3, figsize=(18, 6))
|
||
# Scatter
|
||
ax = axes[0]
|
||
for m in MEDIUM_ORDER:
|
||
sub = g[g['Medium']==m]
|
||
ax.scatter(sub['Reading'], sub['Tutoring'], color=MEDIUM_COLORS[m], s=60, alpha=0.7, edgecolors='white', lw=0.5, label=m)
|
||
ax.axhline(0, color='gray', lw=0.8, alpha=0.5); ax.axvline(0, color='gray', lw=0.8, alpha=0.5)
|
||
ax.set_xlabel('Reading Gain (%)'); ax.set_ylabel('Tutoring Gain (%)'); ax.legend(title='Medium'); ax.set_title('Reading vs Tutoring')
|
||
# Bar
|
||
ax = axes[1]; gm = g.groupby('Medium')[['Reading','Tutoring']].agg(['mean','sem']); xp = np.arange(3); w = 0.35
|
||
for i, (gt, c, l) in enumerate([('Reading','#1976D2','Reading'),('Tutoring','#E65100','Tutoring')]):
|
||
ms = [gm.loc[m,(gt,'mean')] for m in MEDIUM_ORDER]; se = [gm.loc[m,(gt,'sem')]*1.96 for m in MEDIUM_ORDER]
|
||
bars = ax.bar(xp+i*w-w/2, ms, w, yerr=se, color=c, alpha=0.8, capsize=4, label=l)
|
||
for b, v in zip(bars, ms): ax.text(b.get_x()+b.get_width()/2, b.get_height()+1, f'{v:.1f}', ha='center', fontsize=9)
|
||
ax.set_xticks(xp); ax.set_xticklabels(MEDIUM_ORDER); ax.set_ylabel('Score Gain (%)'); ax.axhline(0, color='gray', lw=0.8); ax.legend(); ax.set_title('Mean Gains by Medium')
|
||
# Total
|
||
ax = axes[2]; tm = g.groupby('Medium')['Total'].agg(['mean','sem'])
|
||
bars = ax.bar(MEDIUM_ORDER, [tm.loc[m,'mean'] for m in MEDIUM_ORDER], color=[MEDIUM_COLORS[m] for m in MEDIUM_ORDER], alpha=0.8, yerr=[tm.loc[m,'sem']*1.96 for m in MEDIUM_ORDER], capsize=5)
|
||
for b, m in zip(bars, MEDIUM_ORDER): ax.text(b.get_x()+b.get_width()/2, b.get_height()+1, f'{tm.loc[m,"mean"]:.1f}%', ha='center', fontsize=10, fontweight='bold')
|
||
ax.set_ylabel('Total Gain (%)'); ax.axhline(0, color='gray', lw=0.8); ax.set_title('Total Learning Gain')
|
||
fig.suptitle('Learning Gains Overview', fontsize=14, fontweight='bold')
|
||
fig.tight_layout(); fig.savefig(PLOT_DIR / 'C2_learning_gains.png'); plt.close(fig)
|
||
|
||
|
||
# =============================================================================
|
||
# D. CONFIDENCE ANALYSIS
|
||
# =============================================================================
|
||
|
||
def plot_D1_confidence_vs_score(df):
|
||
fig, axes = plt.subplots(1, 3, figsize=(18, 6), sharey=True)
|
||
ax = axes[0]
|
||
ax.scatter(df['Score_Pct'], df['Avg_Confidence'], alpha=0.4, s=40, c='#546E7A', edgecolors='white', lw=0.5)
|
||
mask = df[['Score_Pct','Avg_Confidence']].dropna().index; xr = df.loc[mask,'Score_Pct']; yr = df.loc[mask,'Avg_Confidence']
|
||
if len(xr) > 2:
|
||
z = np.polyfit(xr, yr, 1); xl = np.linspace(xr.min(), xr.max(), 100)
|
||
ax.plot(xl, np.poly1d(z)(xl), 'r-', lw=2, alpha=0.8)
|
||
r = np.corrcoef(xr, yr)[0,1]
|
||
ax.text(0.05, 0.95, f'r = {r:.3f}', transform=ax.transAxes, fontsize=12, va='top', fontweight='bold', bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))
|
||
ax.set_xlabel('Test Score (%)'); ax.set_ylabel('Avg Confidence (1-7)'); ax.set_title('Overall')
|
||
ax = axes[1]
|
||
for phase, c in zip(PHASE_ORDER, ['#E8EAF6','#C5CAE9','#7986CB','#3F51B5']):
|
||
sub = df[df['Zeitpunkt']==phase]
|
||
ax.scatter(sub['Score_Pct'], sub['Avg_Confidence'], alpha=0.5, s=40, label=PHASE_SHORT[phase], edgecolors='white', lw=0.5)
|
||
ax.legend(fontsize=9, title='Phase'); ax.set_xlabel('Test Score (%)'); ax.set_title('By Phase')
|
||
ax = axes[2]
|
||
for m in MEDIUM_ORDER:
|
||
sub = df[df['Medium']==m]
|
||
ax.scatter(sub['Score_Pct'], sub['Avg_Confidence'], alpha=0.5, s=40, color=MEDIUM_COLORS[m], label=m, edgecolors='white', lw=0.5)
|
||
ax.legend(fontsize=9, title='Medium'); ax.set_xlabel('Test Score (%)'); ax.set_title('By Medium')
|
||
fig.suptitle('Confidence vs Test Score', fontsize=14, fontweight='bold')
|
||
fig.tight_layout(); fig.savefig(PLOT_DIR / 'D1_confidence_vs_score.png'); plt.close(fig)
|
||
|
||
|
||
def plot_D2_delta_conf_vs_score(df):
|
||
ps = df.pivot_table(index=['Participant','Topic','Medium'], columns='Zeitpunkt', values='Score_Pct')
|
||
pc = df.pivot_table(index=['Participant','Topic','Medium'], columns='Zeitpunkt', values='Avg_Confidence')
|
||
d = pd.DataFrame({
|
||
'R_S': ps.get('Post-Reading',0)-ps.get('Pre-Reading',0), 'R_C': pc.get('Post-Reading',0)-pc.get('Pre-Reading',0),
|
||
'T_S': ps.get('Post-Tutoring',0)-ps.get('Pre-Tutoring',0), 'T_C': pc.get('Post-Tutoring',0)-pc.get('Pre-Tutoring',0),
|
||
'A_S': ps.get('Post-Tutoring',0)-ps.get('Pre-Reading',0), 'A_C': pc.get('Post-Tutoring',0)-pc.get('Pre-Reading',0),
|
||
}).reset_index().dropna()
|
||
fig, axes = plt.subplots(1, 3, figsize=(18, 6))
|
||
for ax, (sx, sy, title) in zip(axes, [('R_S','R_C','Reading Phase'),('T_S','T_C','Tutoring Phase'),('A_S','A_C','Total')]):
|
||
for m in MEDIUM_ORDER:
|
||
sub = d[d['Medium']==m]
|
||
ax.scatter(sub[sx], sub[sy], color=MEDIUM_COLORS[m], s=50, alpha=0.6, edgecolors='white', label=m)
|
||
xv, yv = d[sx].values, d[sy].values
|
||
if len(xv) > 2:
|
||
z = np.polyfit(xv, yv, 1); xl = np.linspace(xv.min(), xv.max(), 100)
|
||
ax.plot(xl, np.poly1d(z)(xl), 'r-', lw=1.5, alpha=0.7)
|
||
r = np.corrcoef(xv, yv)[0,1]
|
||
ax.text(0.05, 0.95, f'r = {r:.3f}', transform=ax.transAxes, fontsize=11, va='top', fontweight='bold', bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))
|
||
ax.axhline(0, color='gray', lw=0.8, alpha=0.5); ax.axvline(0, color='gray', lw=0.8, alpha=0.5)
|
||
ax.set_xlabel('Score Change (%)'); ax.set_ylabel('Confidence Change'); ax.set_title(title); ax.legend(title='Medium', fontsize=8)
|
||
fig.suptitle('Do Changes in Confidence Track Changes in Score?', fontsize=14, fontweight='bold')
|
||
fig.tight_layout(); fig.savefig(PLOT_DIR / 'D2_delta_conf_vs_score.png'); plt.close(fig)
|
||
|
||
|
||
def plot_D3_calibration(df):
|
||
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
|
||
dc = df[['Score_Pct','Avg_Confidence','Zeitpunkt']].dropna().copy()
|
||
dc['Bin'] = pd.cut(dc['Avg_Confidence'], bins=[0.5,1.5,2.5,3.5,4.5,5.5,6.5,7.5], labels=list('1234567'))
|
||
cal = dc.groupby('Bin', observed=True)['Score_Pct'].agg(['mean','sem','count'])
|
||
cal = cal[cal['count']>=3]
|
||
ax1.bar(cal.index.astype(str), cal['mean'], yerr=cal['sem']*1.96, capsize=4, color='#5C6BC0', alpha=0.8, edgecolor='white')
|
||
for idx, row in cal.iterrows(): ax1.text(idx, row['mean']+2, f'n={int(row["count"])}', ha='center', fontsize=8, color='gray')
|
||
ax1.set_xlabel('Confidence Rating'); ax1.set_ylabel('Mean Test Score (%)'); ax1.set_title('Overall Calibration')
|
||
pcol = {'Pre-Reading':'#E8EAF6','Post-Reading':'#9FA8DA','Pre-Tutoring':'#5C6BC0','Post-Tutoring':'#283593'}
|
||
for phase in PHASE_ORDER:
|
||
sub = dc[dc['Zeitpunkt']==phase]
|
||
if len(sub) < 5: continue
|
||
bins = pd.cut(sub['Avg_Confidence'], bins=[0.5,2.5,4.5,7.5], labels=['Low (1-2)','Med (3-4)','High (5-7)'])
|
||
ms = sub.groupby(bins, observed=True)['Score_Pct'].mean()
|
||
ax2.plot(ms.index.astype(str), ms.values, marker='o', lw=2, ms=8, color=pcol[phase], label=PHASE_SHORT[phase])
|
||
ax2.set_xlabel('Confidence Level'); ax2.set_ylabel('Mean Test Score (%)'); ax2.legend(title='Phase'); ax2.set_title('Calibration by Phase')
|
||
fig.suptitle('Confidence Calibration', fontsize=14, fontweight='bold')
|
||
fig.tight_layout(); fig.savefig(PLOT_DIR / 'D3_calibration.png'); plt.close(fig)
|
||
|
||
|
||
# =============================================================================
|
||
# E. PERSONALITY CORRELATIONS
|
||
# =============================================================================
|
||
|
||
def plot_E1_personality_correlations(merged):
|
||
outcomes = ['Mean_Score_Gain','Mean_Conf_Gain','Mean_Total_Gain','Mean_Pre_Score','Mean_Post_Score','Mean_Pre_Conf','Mean_Post_Conf']
|
||
labels = ['Tutor\nScore Gain','Tutor\nConf Gain','Total\nGain','Pre-Tutor\nScore','Post-Tutor\nScore','Pre-Tutor\nConf','Post-Tutor\nConf']
|
||
corr = np.zeros((5, 7)); pvals = np.zeros_like(corr)
|
||
for i, t in enumerate(TRAIT_ORDER):
|
||
for j, o in enumerate(outcomes):
|
||
xv, yv = merged[t].values, merged[o].values
|
||
mask = ~(np.isnan(xv)|np.isnan(yv))
|
||
if mask.sum() > 3: corr[i,j], pvals[i,j] = stats.pearsonr(xv[mask], yv[mask])
|
||
else: corr[i,j] = np.nan; pvals[i,j] = 1
|
||
fig, ax = plt.subplots(figsize=(12, 6))
|
||
sns.heatmap(pd.DataFrame(corr, index=TRAIT_ORDER, columns=labels), annot=True, fmt='.2f', cmap='RdBu_r', center=0, vmin=-0.7, vmax=0.7, ax=ax, linewidths=1, cbar_kws={'label':'Pearson r'})
|
||
for i in range(5):
|
||
for j in range(7):
|
||
star = '**' if pvals[i,j]<.01 else '*' if pvals[i,j]<.05 else ''
|
||
if star: ax.text(j+0.5, i+0.75, star, ha='center', va='center', fontsize=12, fontweight='bold', color='black')
|
||
ax.set_title('Big Five Traits vs Tutoring Outcomes (* p<.05, ** p<.01)', fontsize=13, fontweight='bold')
|
||
fig.tight_layout(); fig.savefig(PLOT_DIR / 'E1_personality_correlations.png'); plt.close(fig)
|
||
|
||
|
||
def plot_E2_trait_vs_score_gain(merged):
|
||
fig, axes = plt.subplots(1, 5, figsize=(22, 5), sharey=True)
|
||
for ax, trait in zip(axes, TRAIT_ORDER):
|
||
xv, yv = merged[trait].values, merged['Mean_Score_Gain'].values
|
||
mask = ~(np.isnan(xv)|np.isnan(yv))
|
||
ax.scatter(xv[mask], yv[mask], s=60, color=TRAIT_COLORS[trait], alpha=0.7, edgecolors='white', lw=0.5)
|
||
for _, row in merged.iterrows():
|
||
if pd.notna(row[trait]) and pd.notna(row['Mean_Score_Gain']):
|
||
ax.annotate(row['Participant'], (row[trait], row['Mean_Score_Gain']), fontsize=7, alpha=0.5, textcoords="offset points", xytext=(3,3))
|
||
if mask.sum() > 3:
|
||
r, p = stats.pearsonr(xv[mask], yv[mask])
|
||
z = np.polyfit(xv[mask], yv[mask], 1); xl = np.linspace(xv[mask].min(), xv[mask].max(), 100)
|
||
ax.plot(xl, np.poly1d(z)(xl), color=TRAIT_COLORS[trait], lw=2, alpha=0.6)
|
||
sig = '*' if p<.05 else ''
|
||
ax.text(0.05, 0.95, f'r={r:.2f} p={p:.3f}{sig}', transform=ax.transAxes, fontsize=10, va='top', bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))
|
||
ax.axhline(0, color='gray', lw=0.5, alpha=0.5); ax.set_xlabel(trait, fontsize=11, fontweight='bold', color=TRAIT_COLORS[trait]); ax.set_xlim(1,7)
|
||
axes[0].set_ylabel('Mean Tutoring Score Gain (%)', fontsize=11)
|
||
fig.suptitle('Big Five Traits vs Tutoring Score Gains', fontsize=14, fontweight='bold')
|
||
fig.tight_layout(); fig.savefig(PLOT_DIR / 'E2_trait_vs_score_gain.png'); plt.close(fig)
|
||
|
||
|
||
# =============================================================================
|
||
# STATS EXPORT
|
||
# =============================================================================
|
||
|
||
def export_stats(df, paired):
|
||
rows = []
|
||
|
||
# --- Overall trajectory ---
|
||
phase_means = df.groupby('Zeitpunkt', observed=True).agg(
|
||
Mean_Score=('Score_Pct', 'mean'),
|
||
SEM_Score=('Score_Pct', 'sem'),
|
||
Mean_Confidence=('Avg_Confidence', 'mean'),
|
||
SEM_Confidence=('Avg_Confidence', 'sem'),
|
||
).reindex(PHASE_ORDER)
|
||
phase_means.index.name = 'Phase'
|
||
phase_means.to_csv(STATS_DIR / 'overall_trajectory.csv', float_format='%.3f')
|
||
|
||
# --- Start-to-finish gain ---
|
||
pre_r = df[df['Zeitpunkt'] == 'Pre-Reading'][['Participant', 'Topic', 'Medium', 'Score_Pct']].copy()
|
||
pre_r.columns = ['Participant', 'Topic', 'Medium', 'Start']
|
||
post_t = df[df['Zeitpunkt'] == 'Post-Tutoring'][['Participant', 'Topic', 'Medium', 'Score_Pct']].copy()
|
||
post_t.columns = ['Participant', 'Topic', 'Medium', 'End']
|
||
sf = pre_r.merge(post_t, on=['Participant', 'Topic', 'Medium'])
|
||
sf['Gain'] = sf['End'] - sf['Start']
|
||
t_sf, p_sf = stats.ttest_1samp(sf['Gain'].dropna(), 0)
|
||
sf_summary = pd.DataFrame([{
|
||
'Metric': 'Pre-Reading to Post-Tutoring',
|
||
'N': len(sf),
|
||
'Gain_Mean': sf['Gain'].mean(),
|
||
'Gain_SD': sf['Gain'].std(),
|
||
't_stat': t_sf,
|
||
'p_value': p_sf,
|
||
}])
|
||
sf_summary.to_csv(STATS_DIR / 'start_to_finish.csv', index=False, float_format='%.3f')
|
||
|
||
# --- Tutoring stats by medium ---
|
||
med_rows = []
|
||
for m in MEDIUM_ORDER:
|
||
sub = paired[paired['Medium'] == m]
|
||
t_val, p_val = stats.ttest_rel(sub['Pre_Score'], sub['Post_Score'])
|
||
d_score = cohens_d(sub['Pre_Score'], sub['Post_Score'])
|
||
t_c, p_c = stats.ttest_rel(sub['Pre_Conf'], sub['Post_Conf'])
|
||
d_conf = cohens_d(sub['Pre_Conf'], sub['Post_Conf'])
|
||
all_m = df[df['Medium'] == m]
|
||
med_rows.append({
|
||
'Medium': m,
|
||
'N_pairs': len(sub),
|
||
'Avg_Score_Mean': all_m['Score_Pct'].mean(),
|
||
'Avg_Score_SD': all_m['Score_Pct'].std(),
|
||
'Avg_Conf_Mean': all_m['Avg_Confidence'].mean(),
|
||
'Avg_Conf_SD': all_m['Avg_Confidence'].std(),
|
||
'Pre_Score_Mean': sub['Pre_Score'].mean(),
|
||
'Post_Score_Mean': sub['Post_Score'].mean(),
|
||
'Score_Gain_Mean': sub['Score_Gain'].mean(),
|
||
'Score_Gain_SD': sub['Score_Gain'].std(),
|
||
'Score_Cohens_d': d_score,
|
||
'Score_t': t_val,
|
||
'Score_p': p_val,
|
||
'Pre_Conf_Mean': sub['Pre_Conf'].mean(),
|
||
'Post_Conf_Mean': sub['Post_Conf'].mean(),
|
||
'Conf_Gain_Mean': sub['Conf_Gain'].mean(),
|
||
'Conf_Gain_SD': sub['Conf_Gain'].std(),
|
||
'Conf_Cohens_d': d_conf,
|
||
'Conf_t': t_c,
|
||
'Conf_p': p_c,
|
||
})
|
||
pd.DataFrame(med_rows).to_csv(STATS_DIR / 'tutoring_by_medium.csv', index=False, float_format='%.3f')
|
||
|
||
# --- Tutoring stats by topic ---
|
||
topic_rows = []
|
||
for topic in df['Topic'].unique():
|
||
sub_t = paired[paired['Topic'] == topic]
|
||
all_t = df[df['Topic'] == topic]
|
||
t_val, p_val = stats.ttest_rel(sub_t['Pre_Score'], sub_t['Post_Score'])
|
||
d_score = cohens_d(sub_t['Pre_Score'], sub_t['Post_Score'])
|
||
topic_rows.append({
|
||
'Topic': topic,
|
||
'N_pairs': len(sub_t),
|
||
'Avg_Score_Mean': all_t['Score_Pct'].mean(),
|
||
'Avg_Score_SD': all_t['Score_Pct'].std(),
|
||
'Avg_Conf_Mean': all_t['Avg_Confidence'].mean(),
|
||
'Avg_Conf_SD': all_t['Avg_Confidence'].std(),
|
||
'Score_Gain_Mean': sub_t['Score_Gain'].mean(),
|
||
'Score_Gain_SD': sub_t['Score_Gain'].std(),
|
||
'Score_Cohens_d': d_score,
|
||
'Score_t': t_val,
|
||
'Score_p': p_val,
|
||
})
|
||
pd.DataFrame(topic_rows).to_csv(STATS_DIR / 'tutoring_by_topic.csv', index=False, float_format='%.3f')
|
||
|
||
# --- Participant summary ---
|
||
part_rows = []
|
||
for pid in sorted(df['Participant'].unique(), key=lambda x: int(x[1:])):
|
||
sub_df = df[df['Participant'] == pid]
|
||
sub_p = paired[paired['Participant'] == pid]
|
||
phases = sub_df.groupby('Zeitpunkt', observed=True)['Score_Pct'].mean().reindex(PHASE_ORDER)
|
||
part_rows.append({
|
||
'Participant': pid,
|
||
'N_Tests': len(sub_df),
|
||
'Avg_Score_Mean': sub_df['Score_Pct'].mean(),
|
||
'Avg_Conf_Mean': sub_df['Avg_Confidence'].mean(),
|
||
'Pre_Reading': phases.get('Pre-Reading', float('nan')),
|
||
'Post_Reading': phases.get('Post-Reading', float('nan')),
|
||
'Pre_Tutoring': phases.get('Pre-Tutoring', float('nan')),
|
||
'Post_Tutoring': phases.get('Post-Tutoring', float('nan')),
|
||
'Reading_Gain': phases.get('Post-Reading', float('nan')) - phases.get('Pre-Reading', float('nan')),
|
||
'Tutoring_Gain': sub_p['Score_Gain'].mean() if len(sub_p) else float('nan'),
|
||
})
|
||
pd.DataFrame(part_rows).to_csv(STATS_DIR / 'participant_summary.csv', index=False, float_format='%.3f')
|
||
|
||
# --- Tutoring gain by medium × topic ---
|
||
mt_rows = []
|
||
for m in MEDIUM_ORDER:
|
||
for topic in df['Topic'].unique():
|
||
sub = paired[(paired['Medium'] == m) & (paired['Topic'] == topic)]
|
||
mt_rows.append({
|
||
'Medium': m,
|
||
'Topic': topic,
|
||
'N': len(sub),
|
||
'Score_Gain_Mean': sub['Score_Gain'].mean() if len(sub) else float('nan'),
|
||
'Score_Gain_SD': sub['Score_Gain'].std() if len(sub) else float('nan'),
|
||
'Score_Gain_SEM': sub['Score_Gain'].sem() if len(sub) else float('nan'),
|
||
})
|
||
pd.DataFrame(mt_rows).to_csv(STATS_DIR / 'tutoring_by_medium_topic.csv', index=False, float_format='%.3f')
|
||
|
||
print(f" Stats exported to: {STATS_DIR}")
|
||
|
||
|
||
# =============================================================================
|
||
# MAIN
|
||
# =============================================================================
|
||
|
||
def main():
|
||
print("Loading data...")
|
||
df = load_data()
|
||
paired = build_paired_tutoring(df)
|
||
personality = load_personality()
|
||
|
||
# Build merged for personality analysis
|
||
p_agg = paired.groupby('Participant').agg(
|
||
Mean_Score_Gain=('Score_Gain','mean'), Mean_Conf_Gain=('Conf_Gain','mean'),
|
||
Mean_Pre_Score=('Pre_Score','mean'), Mean_Post_Score=('Post_Score','mean'),
|
||
Mean_Pre_Conf=('Pre_Conf','mean'), Mean_Post_Conf=('Post_Conf','mean'),
|
||
).reset_index()
|
||
# Total gain
|
||
pre_r = df[df['Zeitpunkt']=='Pre-Reading'][['Participant','Topic','Medium','Score_Pct']].copy()
|
||
pre_r.columns = ['Participant','Topic','Medium','PreRead']
|
||
post_t = df[df['Zeitpunkt']=='Post-Tutoring'][['Participant','Topic','Medium','Score_Pct']].copy()
|
||
post_t.columns = ['Participant','Topic','Medium','PostTutor']
|
||
total = pre_r.merge(post_t, on=['Participant','Topic','Medium'])
|
||
total['TotalGain'] = total['PostTutor'] - total['PreRead']
|
||
tg = total.groupby('Participant')['TotalGain'].mean().reset_index()
|
||
tg.columns = ['Participant','Mean_Total_Gain']
|
||
p_agg = p_agg.merge(tg, on='Participant', how='left')
|
||
merged = p_agg.merge(personality, on='Participant', how='inner')
|
||
|
||
print(f" {len(df)} test entries, {paired['Participant'].nunique()} participants, "
|
||
f"{len(merged)} with personality data\n")
|
||
|
||
# Generate all plots
|
||
sections = [
|
||
("A. Overall Learning Trajectory", [
|
||
("A1", "Overall trajectory (score + confidence)", lambda: plot_A1_trajectory(df)),
|
||
("A2", "Trajectory by medium", lambda: plot_A2_trajectory_by_medium(df)),
|
||
("A3", "Trajectory by topic", lambda: plot_A3_trajectory_by_topic(df)),
|
||
("A4", "Participant-level heatmaps", lambda: plot_A4_heatmap(df)),
|
||
]),
|
||
("B. Tutoring Phase Deep-Dive", [
|
||
("B1", "Paired slopes by medium (with stats)", lambda: plot_B1_tutoring_slopes_by_medium(paired)),
|
||
("B2", "Paired slopes by topic (with stats)", lambda: plot_B2_tutoring_slopes_by_topic(paired)),
|
||
("B3", "Tutoring gain by medium (effect sizes)", lambda: plot_B3_tutoring_gain_by_medium(paired)),
|
||
("B4", "Medium x topic interaction", lambda: plot_B4_tutoring_medium_topic(paired)),
|
||
("B5", "Tutoring effectiveness dashboard", lambda: plot_B5_tutoring_dashboard(paired)),
|
||
]),
|
||
("C. Start-to-Finish Gains", [
|
||
("C1", "Pre-Reading to Post-Tutoring paired", lambda: plot_C1_start_to_finish(df)),
|
||
("C2", "Learning gains overview", lambda: plot_C2_learning_gains(df)),
|
||
]),
|
||
("D. Confidence Analysis", [
|
||
("D1", "Confidence vs test score scatter", lambda: plot_D1_confidence_vs_score(df)),
|
||
("D2", "Change in confidence vs change in score", lambda: plot_D2_delta_conf_vs_score(df)),
|
||
("D3", "Confidence calibration", lambda: plot_D3_calibration(df)),
|
||
]),
|
||
("E. Personality Correlations", [
|
||
("E1", "Big Five vs tutoring outcomes heatmap", lambda: plot_E1_personality_correlations(merged)),
|
||
("E2", "Trait vs tutoring score gain", lambda: plot_E2_trait_vs_score_gain(merged)),
|
||
]),
|
||
]
|
||
|
||
for section_name, plots in sections:
|
||
print(f"{section_name}")
|
||
for code, desc, fn in plots:
|
||
fn()
|
||
print(f" [{code}] {desc}")
|
||
|
||
print(f"\n16 plots saved to: {PLOT_DIR}")
|
||
|
||
print("\nExporting statistics...")
|
||
export_stats(df, paired)
|
||
print("Done.")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|