""" generate_plots.py Consolidated visualization script for VirTu-Eval experiment data. Generates all plots into Data/plots/ organized by section: A. Overall Learning Trajectory (4 plots) B. Tutoring Phase Deep-Dive (5 plots) C. Start-to-Finish Gains (2 plots) D. Confidence Analysis (3 plots) E. Personality Correlations (2 plots) Usage: python generate_plots.py """ import csv import pandas as pd import numpy as np import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt import matplotlib.patches as mpatches from matplotlib.lines import Line2D import seaborn as sns from pathlib import Path from io import StringIO from scipy import stats # ============================================================================= # CONFIG # ============================================================================= BASE = Path(__file__).resolve().parent / "Data" PLOT_DIR = BASE / "plots" PLOT_DIR.mkdir(exist_ok=True) STATS_DIR = BASE / "stats" STATS_DIR.mkdir(exist_ok=True) PHASE_ORDER = ['Pre-Reading', 'Post-Reading', 'Pre-Tutoring', 'Post-Tutoring'] PHASE_LABELS = ['Pre-Read', 'Post-Read', 'Pre-Tutor', 'Post-Tutor'] PHASE_SHORT = dict(zip(PHASE_ORDER, PHASE_LABELS)) MEDIUM_ORDER = ['Chat', 'Video', 'VR'] MEDIUM_COLORS = {'Chat': '#2196F3', 'Video': '#FF9800', 'VR': '#4CAF50'} TOPIC_ORDER = ['Mendel', 'DNA-Replikation', 'Ökologie'] TOPIC_COLORS = {'Mendel': '#E91E63', 'DNA-Replikation': '#9C27B0', 'Ökologie': '#009688'} BFI_TRAITS = { 'Neuroticism': {'items': [1, 2, 3], 'reverse': [3]}, 'Extraversion': {'items': [4, 5, 6], 'reverse': [6]}, 'Openness': {'items': [7, 8, 9], 'reverse': []}, 'Agreeableness': {'items': [10, 11, 12], 'reverse': [10]}, 'Conscientiousness': {'items': [13, 14, 15], 'reverse': [14]}, } TRAIT_ORDER = list(BFI_TRAITS.keys()) TRAIT_COLORS = {'Neuroticism': '#E53935', 'Extraversion': '#FB8C00', 'Openness': '#43A047', 'Agreeableness': '#1E88E5', 'Conscientiousness': '#8E24AA'} sns.set_theme(style="whitegrid", font_scale=1.05) plt.rcParams['figure.dpi'] = 150 plt.rcParams['savefig.bbox'] = 'tight' def cohens_d(pre, post): diff = post - pre return diff.mean() / diff.std() if diff.std() > 0 else 0 # ============================================================================= # DATA LOADING # ============================================================================= def load_data(): df = pd.read_csv(BASE / "test_scores_all.csv", encoding="utf-8-sig") # Normalize typo "Pre-Tutor" -> "Pre-Tutoring" df['Zeitpunkt'] = df['Zeitpunkt'].str.strip().replace('Pre-Tutor', 'Pre-Tutoring') df['Phase'] = pd.Categorical(df['Zeitpunkt'], categories=PHASE_ORDER, ordered=True) df['Phase_Label'] = pd.Categorical( df['Zeitpunkt'].map(PHASE_SHORT), categories=PHASE_LABELS, ordered=True) df['Phase_Idx'] = df['Zeitpunkt'].map({p: i for i, p in enumerate(PHASE_ORDER)}) df['P_Num'] = df['Participant'].str.extract(r'(\d+)').astype(int) return df def build_paired_tutoring(df): pre = df[df['Zeitpunkt'] == 'Pre-Tutoring'][ ['Participant', 'Topic', 'Medium', 'Score_Pct', 'Avg_Confidence']].copy() post = df[df['Zeitpunkt'] == 'Post-Tutoring'][ ['Participant', 'Topic', 'Medium', 'Score_Pct', 'Avg_Confidence']].copy() pre.columns = ['Participant', 'Topic', 'Medium', 'Pre_Score', 'Pre_Conf'] post.columns = ['Participant', 'Topic', 'Medium', 'Post_Score', 'Post_Conf'] paired = pre.merge(post, on=['Participant', 'Topic', 'Medium']) paired['Score_Gain'] = paired['Post_Score'] - paired['Pre_Score'] paired['Conf_Gain'] = paired['Post_Conf'] - paired['Pre_Conf'] paired['P_Num'] = paired['Participant'].str.extract(r'(\d+)').astype(int) return paired def load_personality(): path = BASE / "Final-Questionnaire.csv" with open(path, encoding="utf-8-sig") as f: reader = csv.reader(StringIO(f.read())) rows = list(reader) header, data = rows[0], rows[1:] records = [] for row in data: pid = row[-1].strip() if not pid: continue pid = pid if pid.startswith('P') else f'P{pid}' items = {} for i in range(1, 16): try: items[i] = int(row[i].strip()) except (ValueError, IndexError): items[i] = np.nan traits = {} for trait, info in BFI_TRAITS.items(): vals = [] for it in info['items']: v = items.get(it, np.nan) if pd.notna(v): vals.append(8 - v if it in info['reverse'] else v) traits[trait] = np.mean(vals) if vals else np.nan rec = {'Participant': pid} rec.update(traits) records.append(rec) return pd.DataFrame(records) # ============================================================================= # A. OVERALL LEARNING TRAJECTORY # ============================================================================= def plot_A1_trajectory(df): fig, ax1 = plt.subplots(figsize=(10, 6)) means = df.groupby('Phase_Label', observed=True).agg( S=('Score_Pct', 'mean'), S_se=('Score_Pct', 'sem'), C=('Avg_Confidence', 'mean'), C_se=('Avg_Confidence', 'sem'), ).reindex(PHASE_LABELS) x = np.arange(4) c1, c2 = '#1976D2', '#E65100' ax1.errorbar(x, means['S'], yerr=means['S_se']*1.96, color=c1, marker='o', markersize=10, linewidth=2.5, capsize=5, capthick=2, label='Score %', zorder=5) ax1.set_ylabel('Test Score (%)', color=c1, fontsize=13); ax1.set_ylim(30, 100) ax1.tick_params(axis='y', labelcolor=c1) ax2 = ax1.twinx() ax2.errorbar(x, means['C'], yerr=means['C_se']*1.96, color=c2, marker='s', markersize=10, linewidth=2.5, capsize=5, capthick=2, linestyle='--', label='Confidence', zorder=5) ax2.set_ylabel('Avg Confidence (1-7)', color=c2, fontsize=13); ax2.set_ylim(1, 7) ax2.tick_params(axis='y', labelcolor=c2) ax1.set_xticks(x); ax1.set_xticklabels(PHASE_LABELS, fontsize=12) for i, row in means.iterrows(): idx = PHASE_LABELS.index(i) ax1.annotate(f'{row["S"]:.1f}%', (idx, row['S']), textcoords="offset points", xytext=(0, 14), ha='center', fontsize=10, color=c1, fontweight='bold') ax2.annotate(f'{row["C"]:.2f}', (idx, row['C']), textcoords="offset points", xytext=(0, -18), ha='center', fontsize=10, color=c2, fontweight='bold') h1, l1 = ax1.get_legend_handles_labels() h2, l2 = ax2.get_legend_handles_labels() ax1.legend(h1+h2, l1+l2, loc='lower right', fontsize=11) ax1.annotate('', xy=(0.32,-0.12), xytext=(0,-0.12), arrowprops=dict(arrowstyle='<->',color='gray',lw=1.5), annotation_clip=False, xycoords='axes fraction') ax1.annotate('', xy=(1,-0.12), xytext=(0.68,-0.12), arrowprops=dict(arrowstyle='<->',color='gray',lw=1.5), annotation_clip=False, xycoords='axes fraction') ax1.text(0.16,-0.17,'Reading Phase',transform=ax1.transAxes,ha='center',fontsize=10,color='gray') ax1.text(0.84,-0.17,'Tutoring Phase',transform=ax1.transAxes,ha='center',fontsize=10,color='gray') fig.suptitle('Overall Learning Trajectory', fontsize=15, fontweight='bold') fig.savefig(PLOT_DIR / 'A1_trajectory.png', bbox_inches='tight'); plt.close(fig) def plot_A2_trajectory_by_medium(df): fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6)) x = np.arange(4); off = [-0.1, 0, 0.1] for j, m in enumerate(MEDIUM_ORDER): sub = df[df['Medium'] == m] ms = sub.groupby('Phase_Label', observed=True)['Score_Pct'].agg(['mean','sem']).reindex(PHASE_LABELS) ax1.errorbar(x+off[j], ms['mean'], yerr=ms['sem']*1.96, color=MEDIUM_COLORS[m], marker='o', markersize=8, linewidth=2, capsize=4, label=m) mc = sub.groupby('Phase_Label', observed=True)['Avg_Confidence'].agg(['mean','sem']).reindex(PHASE_LABELS) ax2.errorbar(x+off[j], mc['mean'], yerr=mc['sem']*1.96, color=MEDIUM_COLORS[m], marker='s', markersize=8, linewidth=2, capsize=4, linestyle='--', label=m) ax1.set_xticks(x); ax1.set_xticklabels(PHASE_LABELS); ax1.set_ylabel('Test Score (%)'); ax1.set_ylim(30,100); ax1.legend(title='Medium'); ax1.set_title('Score') ax2.set_xticks(x); ax2.set_xticklabels(PHASE_LABELS); ax2.set_ylabel('Avg Confidence (1-7)'); ax2.set_ylim(1,7); ax2.legend(title='Medium'); ax2.set_title('Confidence') fig.suptitle('Learning Trajectories by Medium', fontsize=14, fontweight='bold') fig.tight_layout(); fig.savefig(PLOT_DIR / 'A2_trajectory_by_medium.png'); plt.close(fig) def plot_A3_trajectory_by_topic(df): fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6)) x = np.arange(4); off = [-0.1, 0, 0.1] for j, t in enumerate(TOPIC_ORDER): sub = df[df['Topic'] == t] ms = sub.groupby('Phase_Label', observed=True)['Score_Pct'].agg(['mean','sem']).reindex(PHASE_LABELS) ax1.errorbar(x+off[j], ms['mean'], yerr=ms['sem']*1.96, color=TOPIC_COLORS[t], marker='o', markersize=8, linewidth=2, capsize=4, label=t) mc = sub.groupby('Phase_Label', observed=True)['Avg_Confidence'].agg(['mean','sem']).reindex(PHASE_LABELS) ax2.errorbar(x+off[j], mc['mean'], yerr=mc['sem']*1.96, color=TOPIC_COLORS[t], marker='s', markersize=8, linewidth=2, capsize=4, linestyle='--', label=t) ax1.set_xticks(x); ax1.set_xticklabels(PHASE_LABELS); ax1.set_ylabel('Test Score (%)'); ax1.set_ylim(30,100); ax1.legend(title='Topic'); ax1.set_title('Score') ax2.set_xticks(x); ax2.set_xticklabels(PHASE_LABELS); ax2.set_ylabel('Avg Confidence (1-7)'); ax2.set_ylim(1,7); ax2.legend(title='Topic'); ax2.set_title('Confidence') fig.suptitle('Learning Trajectories by Topic', fontsize=14, fontweight='bold') fig.tight_layout(); fig.savefig(PLOT_DIR / 'A3_trajectory_by_topic.png'); plt.close(fig) def plot_A4_heatmap(df): pivot_s = df.pivot_table(index='Participant', columns='Zeitpunkt', values='Score_Pct', aggfunc='mean') pivot_s = pivot_s.reindex(columns=PHASE_ORDER).reindex(sorted(pivot_s.index, key=lambda x: int(x[1:]))) pivot_s.columns = PHASE_LABELS pivot_c = df.pivot_table(index='Participant', columns='Zeitpunkt', values='Avg_Confidence', aggfunc='mean') pivot_c = pivot_c.reindex(columns=PHASE_ORDER).reindex(sorted(pivot_c.index, key=lambda x: int(x[1:]))) pivot_c.columns = PHASE_LABELS fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 8)) sns.heatmap(pivot_s, annot=True, fmt='.0f', cmap='RdYlGn', vmin=20, vmax=100, ax=ax1, linewidths=.5, cbar_kws={'label':'Score %'}) ax1.set_title('Test Scores'); ax1.set_ylabel('Participant') sns.heatmap(pivot_c, annot=True, fmt='.1f', cmap='YlOrRd', vmin=1, vmax=7, ax=ax2, linewidths=.5, cbar_kws={'label':'Confidence (1-7)'}) ax2.set_title('Confidence'); ax2.set_ylabel('') fig.suptitle('Participant-Level Heatmaps', fontsize=14, fontweight='bold') fig.tight_layout(); fig.savefig(PLOT_DIR / 'A4_heatmap.png'); plt.close(fig) # ============================================================================= # B. TUTORING PHASE DEEP-DIVE # ============================================================================= def plot_B1_tutoring_slopes_by_medium(paired): fig, axes = plt.subplots(1, 3, figsize=(18, 7), sharey=True) for ax, medium in zip(axes, MEDIUM_ORDER): sub = paired[paired['Medium'] == medium].sort_values('P_Num') for _, row in sub.iterrows(): ax.plot([0,1], [row['Pre_Score'], row['Post_Score']], color=TOPIC_COLORS[row['Topic']], alpha=0.5, linewidth=1.5, marker='o', markersize=5) ax.annotate(row['Participant'], (1.02, row['Post_Score']), fontsize=7, va='center', alpha=0.6) pre_m, post_m = sub['Pre_Score'].mean(), sub['Post_Score'].mean() ax.plot([0,1], [pre_m, post_m], color=MEDIUM_COLORS[medium], linewidth=4, marker='D', markersize=12, zorder=10, markeredgecolor='white', markeredgewidth=2) t, p = stats.ttest_rel(sub['Pre_Score'], sub['Post_Score']) d = cohens_d(sub['Pre_Score'], sub['Post_Score']) sig = '***' if p<.001 else '**' if p<.01 else '*' if p<.05 else 'n.s.' ax.text(0.5, 0.02, f'Gain: {post_m-pre_m:+.1f}% d={d:.2f}\nt={t:.2f}, p={p:.3f} {sig}', transform=ax.transAxes, ha='center', fontsize=10, bbox=dict(boxstyle='round,pad=0.4', facecolor='lightyellow', alpha=0.9)) ax.set_xticks([0,1]); ax.set_xticklabels(['Pre-Tutoring','Post-Tutoring'], fontsize=11) ax.set_title(medium, fontsize=14, fontweight='bold', color=MEDIUM_COLORS[medium]) ax.set_ylim(-5, 110) axes[0].set_ylabel('Test Score (%)', fontsize=12) legend_el = [Line2D([0],[0], color=TOPIC_COLORS[t], lw=2, marker='o', ms=6, label=t) for t in TOPIC_ORDER] legend_el.append(Line2D([0],[0], color='gray', lw=4, marker='D', ms=8, label='Medium Mean')) fig.legend(handles=legend_el, loc='upper center', ncol=4, fontsize=10, bbox_to_anchor=(0.5, 0.02)) fig.suptitle('Tutoring: Individual Trajectories by Medium', fontsize=15, fontweight='bold') fig.tight_layout(rect=[0,0.05,1,0.96]); fig.savefig(PLOT_DIR / 'B1_tutoring_slopes_by_medium.png'); plt.close(fig) def plot_B2_tutoring_slopes_by_topic(paired): fig, axes = plt.subplots(1, 3, figsize=(18, 7), sharey=True) for ax, topic in zip(axes, TOPIC_ORDER): sub = paired[paired['Topic'] == topic].sort_values('P_Num') for _, row in sub.iterrows(): ax.plot([0,1], [row['Pre_Score'], row['Post_Score']], color=MEDIUM_COLORS[row['Medium']], alpha=0.5, linewidth=1.5, marker='o', markersize=5) ax.annotate(row['Participant'], (1.02, row['Post_Score']), fontsize=7, va='center', alpha=0.6) for medium in MEDIUM_ORDER: msub = sub[sub['Medium'] == medium] if len(msub) > 0: pm, qm = msub['Pre_Score'].mean(), msub['Post_Score'].mean() ax.plot([0,1], [pm, qm], color=MEDIUM_COLORS[medium], linewidth=3.5, marker='D', markersize=10, zorder=10, markeredgecolor='white', markeredgewidth=2, label=f'{medium} ({qm-pm:+.1f}%)') t, p = stats.ttest_rel(sub['Pre_Score'], sub['Post_Score']) d = cohens_d(sub['Pre_Score'], sub['Post_Score']) sig = '***' if p<.001 else '**' if p<.01 else '*' if p<.05 else 'n.s.' ax.text(0.5, 0.02, f'Overall: {sub["Score_Gain"].mean():+.1f}% d={d:.2f}\np={p:.3f} {sig}', transform=ax.transAxes, ha='center', fontsize=10, bbox=dict(boxstyle='round,pad=0.4', facecolor='lightyellow', alpha=0.9)) ax.set_xticks([0,1]); ax.set_xticklabels(['Pre-Tutoring','Post-Tutoring'], fontsize=11) ax.set_title(topic, fontsize=14, fontweight='bold', color=TOPIC_COLORS[topic]) ax.set_ylim(-5, 110); ax.legend(fontsize=9, loc='upper left') axes[0].set_ylabel('Test Score (%)', fontsize=12) fig.suptitle('Tutoring: Individual Trajectories by Topic', fontsize=15, fontweight='bold') fig.tight_layout(rect=[0,0,1,0.96]); fig.savefig(PLOT_DIR / 'B2_tutoring_slopes_by_topic.png'); plt.close(fig) def plot_B3_tutoring_gain_by_medium(paired): fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6.5)) rng = np.random.default_rng(42) for i, m in enumerate(MEDIUM_ORDER): sub = paired[paired['Medium'] == m] g, se = sub['Score_Gain'].mean(), sub['Score_Gain'].sem() d = cohens_d(sub['Pre_Score'], sub['Post_Score']) ax1.bar(i, g, color=MEDIUM_COLORS[m], alpha=0.6, width=0.6, yerr=se*1.96, capsize=6, edgecolor='white', lw=1.5) jit = rng.uniform(-0.15, 0.15, len(sub)) ax1.scatter(np.full(len(sub),i)+jit, sub['Score_Gain'], color=MEDIUM_COLORS[m], s=40, alpha=0.7, edgecolors='white', lw=0.5, zorder=5) ax1.text(i, g+se*1.96+2, f'{g:+.1f}%\nd={d:.2f}', ha='center', fontsize=10, fontweight='bold') gc, sec = sub['Conf_Gain'].mean(), sub['Conf_Gain'].sem() dc = cohens_d(sub['Pre_Conf'], sub['Post_Conf']) ax2.bar(i, gc, color=MEDIUM_COLORS[m], alpha=0.6, width=0.6, yerr=sec*1.96, capsize=6, edgecolor='white', lw=1.5) ax2.scatter(np.full(len(sub),i)+jit, sub['Conf_Gain'], color=MEDIUM_COLORS[m], s=40, alpha=0.7, edgecolors='white', lw=0.5, zorder=5) ax2.text(i, gc+sec*1.96+0.15, f'{gc:+.2f}\nd={dc:.2f}', ha='center', fontsize=10, fontweight='bold') ax1.axhline(0, color='gray', lw=1); ax1.set_xticks(range(3)); ax1.set_xticklabels(MEDIUM_ORDER, fontsize=12); ax1.set_ylabel('Score Gain (%)') ax1.set_title('Score Gain') ax2.axhline(0, color='gray', lw=1); ax2.set_xticks(range(3)); ax2.set_xticklabels(MEDIUM_ORDER, fontsize=12); ax2.set_ylabel('Confidence Gain') ax2.set_title('Confidence Gain') fig.suptitle('Tutoring Gains by Medium (with effect sizes)', fontsize=14, fontweight='bold') fig.tight_layout(); fig.savefig(PLOT_DIR / 'B3_tutoring_gain_by_medium.png'); plt.close(fig) def plot_B4_tutoring_medium_topic(paired): fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6)) pivot = paired.pivot_table(index='Medium', columns='Topic', values='Score_Gain', aggfunc='mean') pivot = pivot.reindex(index=MEDIUM_ORDER, columns=TOPIC_ORDER) sns.heatmap(pivot, annot=True, fmt='.1f', cmap='RdYlGn', center=0, ax=ax1, linewidths=1, vmin=-10, vmax=30, cbar_kws={'label':'Score Gain %'}) ax1.set_title('Mean Tutoring Score Gain'); ax1.set_ylabel('Medium') x = np.arange(3); w = 0.25 for j, t in enumerate(TOPIC_ORDER): means = [paired[(paired['Medium']==m)&(paired['Topic']==t)]['Score_Gain'].mean() for m in MEDIUM_ORDER] sems = [paired[(paired['Medium']==m)&(paired['Topic']==t)]['Score_Gain'].sem()*1.96 for m in MEDIUM_ORDER] ax2.bar(x+j*w-w, means, w, yerr=sems, capsize=3, color=TOPIC_COLORS[t], alpha=0.8, label=t, edgecolor='white') ax2.axhline(0, color='gray', lw=0.8); ax2.set_xticks(x); ax2.set_xticklabels(MEDIUM_ORDER, fontsize=12) ax2.set_ylabel('Score Gain (%)'); ax2.legend(title='Topic', fontsize=9); ax2.set_title('Gain by Medium and Topic') fig.suptitle('Medium x Topic Interaction', fontsize=14, fontweight='bold') fig.tight_layout(); fig.savefig(PLOT_DIR / 'B4_tutoring_medium_topic.png'); plt.close(fig) def plot_B5_tutoring_dashboard(paired): fig = plt.figure(figsize=(18, 10)) gs = fig.add_gridspec(2, 3, hspace=0.35, wspace=0.3) x = np.arange(3); w = 0.35 # A) Absolute scores ax = fig.add_subplot(gs[0, 0]) pre_m = [paired[paired['Medium']==m]['Pre_Score'].mean() for m in MEDIUM_ORDER] post_m = [paired[paired['Medium']==m]['Post_Score'].mean() for m in MEDIUM_ORDER] pre_se = [paired[paired['Medium']==m]['Pre_Score'].sem()*1.96 for m in MEDIUM_ORDER] post_se = [paired[paired['Medium']==m]['Post_Score'].sem()*1.96 for m in MEDIUM_ORDER] ax.bar(x-w/2, pre_m, w, yerr=pre_se, capsize=4, color='#BBDEFB', edgecolor='#1976D2', lw=1.5, label='Pre') ax.bar(x+w/2, post_m, w, yerr=post_se, capsize=4, color=[MEDIUM_COLORS[m] for m in MEDIUM_ORDER], alpha=0.8, edgecolor='white', lw=1.5, label='Post') ax.set_xticks(x); ax.set_xticklabels(MEDIUM_ORDER); ax.set_ylabel('Score (%)'); ax.set_ylim(40,100); ax.legend(fontsize=9); ax.set_title('A) Absolute Scores', fontweight='bold') # B) Absolute confidence ax = fig.add_subplot(gs[0, 1]) pre_c = [paired[paired['Medium']==m]['Pre_Conf'].mean() for m in MEDIUM_ORDER] post_c = [paired[paired['Medium']==m]['Post_Conf'].mean() for m in MEDIUM_ORDER] pre_cse = [paired[paired['Medium']==m]['Pre_Conf'].sem()*1.96 for m in MEDIUM_ORDER] post_cse = [paired[paired['Medium']==m]['Post_Conf'].sem()*1.96 for m in MEDIUM_ORDER] ax.bar(x-w/2, pre_c, w, yerr=pre_cse, capsize=4, color='#FFE0B2', edgecolor='#E65100', lw=1.5, label='Pre') ax.bar(x+w/2, post_c, w, yerr=post_cse, capsize=4, color=[MEDIUM_COLORS[m] for m in MEDIUM_ORDER], alpha=0.8, edgecolor='white', lw=1.5, label='Post') ax.set_xticks(x); ax.set_xticklabels(MEDIUM_ORDER); ax.set_ylabel('Confidence (1-7)'); ax.set_ylim(1,7); ax.legend(fontsize=9); ax.set_title('B) Absolute Confidence', fontweight='bold') # C) Gains + effect sizes ax = fig.add_subplot(gs[0, 2]) for i, m in enumerate(MEDIUM_ORDER): sub = paired[paired['Medium']==m] g, se = sub['Score_Gain'].mean(), sub['Score_Gain'].sem() t, p = stats.ttest_rel(sub['Pre_Score'], sub['Post_Score']) d = cohens_d(sub['Pre_Score'], sub['Post_Score']) ax.bar(i, g, color=MEDIUM_COLORS[m], alpha=0.7, yerr=se*1.96, capsize=5, width=0.6) sig = '***' if p<.001 else '**' if p<.01 else '*' if p<.05 else 'n.s.' ax.text(i, g+se*1.96+1.5, f'{g:+.1f}%\nd={d:.2f} {sig}', ha='center', fontsize=10, fontweight='bold') ax.axhline(0, color='gray', lw=1); ax.set_xticks(range(3)); ax.set_xticklabels(MEDIUM_ORDER); ax.set_ylabel('Score Gain (%)'); ax.set_title('C) Gains + Effect Sizes', fontweight='bold') # D) Gain distributions ax = fig.add_subplot(gs[1, 0]) for i, m in enumerate(MEDIUM_ORDER): bp = ax.boxplot(paired[paired['Medium']==m]['Score_Gain'], positions=[i], widths=0.5, patch_artist=True, showmeans=True, meanprops=dict(marker='D', markerfacecolor='black', markersize=6)) bp['boxes'][0].set_facecolor(MEDIUM_COLORS[m]); bp['boxes'][0].set_alpha(0.5) ax.axhline(0, color='gray', lw=0.8, ls='--'); ax.set_xticks(range(3)); ax.set_xticklabels(MEDIUM_ORDER); ax.set_ylabel('Score Gain (%)'); ax.set_title('D) Gain Distributions', fontweight='bold') # E) Improved/same/declined ax = fig.add_subplot(gs[1, 1]) for i, m in enumerate(MEDIUM_ORDER): sub = paired[paired['Medium']==m] imp = (sub['Score_Gain']>0).sum(); same = (sub['Score_Gain']==0).sum(); dec = (sub['Score_Gain']<0).sum(); tot = len(sub) ax.barh([i-0.15, i, i+0.15], [imp/tot*100, same/tot*100, dec/tot*100], height=0.12, color=['#43A047','#9E9E9E','#E53935'], alpha=0.8) ax.text(imp/tot*100+1, i-0.15, f'{imp}/{tot}', va='center', fontsize=9) ax.set_yticks(range(3)); ax.set_yticklabels(MEDIUM_ORDER); ax.set_xlabel('% of Participants') ax.legend([mpatches.Patch(color='#43A047'), mpatches.Patch(color='#9E9E9E'), mpatches.Patch(color='#E53935')], ['Improved','Same','Declined'], fontsize=8, loc='lower right') ax.set_title('E) Improved / Same / Declined', fontweight='bold') # F) Stats table ax = fig.add_subplot(gs[1, 2]); ax.axis('off') tdata = [] for m in MEDIUM_ORDER: sub = paired[paired['Medium']==m] g = sub['Score_Gain'].mean(); t, p = stats.ttest_rel(sub['Pre_Score'], sub['Post_Score']) d = cohens_d(sub['Pre_Score'], sub['Post_Score']); n = len(sub) sig = '***' if p<.001 else '**' if p<.01 else '*' if p<.05 else '' tdata.append([m, str(n), f'{sub["Pre_Score"].mean():.1f}', f'{sub["Post_Score"].mean():.1f}', f'{g:+.1f}', f'{d:.2f}', f'{p:.3f}{sig}']) table = ax.table(cellText=tdata, colLabels=['Medium','N','Pre M','Post M','Gain',"Cohen's d",'p-value'], loc='center', cellLoc='center') table.auto_set_font_size(False); table.set_fontsize(11); table.scale(1.0, 1.8) for j in range(7): table[0,j].set_facecolor('#E0E0E0'); table[0,j].set_text_props(fontweight='bold') for i, m in enumerate(MEDIUM_ORDER): table[i+1,0].set_facecolor(MEDIUM_COLORS[m]); table[i+1,0].set_text_props(color='white', fontweight='bold') ax.set_title('F) Statistical Summary', fontweight='bold', pad=20) fig.suptitle('Tutoring Effectiveness Dashboard', fontsize=16, fontweight='bold') fig.savefig(PLOT_DIR / 'B5_tutoring_dashboard.png'); plt.close(fig) # ============================================================================= # C. START-TO-FINISH GAINS # ============================================================================= def plot_C1_start_to_finish(df): pre_r = df[df['Zeitpunkt']=='Pre-Reading'][['Participant','Topic','Medium','Score_Pct']].copy() post_t = df[df['Zeitpunkt']=='Post-Tutoring'][['Participant','Topic','Medium','Score_Pct']].copy() pre_r.columns = ['Participant','Topic','Medium','Start']; post_t.columns = ['Participant','Topic','Medium','End'] p = pre_r.merge(post_t, on=['Participant','Topic','Medium']); p['Gain'] = p['End'] - p['Start'] fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 7)) for _, row in p.iterrows(): ax1.plot([0,1], [row['Start'], row['End']], color=MEDIUM_COLORS[row['Medium']], alpha=0.25, lw=1) for m in MEDIUM_ORDER: sub = p[p['Medium']==m]; sm, em = sub['Start'].mean(), sub['End'].mean() ax1.plot([0,1], [sm, em], color=MEDIUM_COLORS[m], lw=3.5, marker='o', ms=10, label=f'{m} ({em-sm:+.1f}%)', zorder=10) ax1.set_xticks([0,1]); ax1.set_xticklabels(['Pre-Reading\n(Start)','Post-Tutoring\n(End)'], fontsize=12) ax1.set_ylabel('Test Score (%)'); ax1.set_ylim(0,105); ax1.legend(title='Medium (total gain)', loc='lower right'); ax1.set_title('Score Trajectory') for m in MEDIUM_ORDER: sub = p[p['Medium']==m] ax2.hist(sub['Gain'], bins=10, alpha=0.5, color=MEDIUM_COLORS[m], label=f'{m} (M={sub["Gain"].mean():.1f}%)', edgecolor='white') ax2.axvline(0, color='gray', lw=1, ls='--'); ax2.set_xlabel('Total Gain (%)'); ax2.set_ylabel('Count'); ax2.legend(title='Medium'); ax2.set_title('Gain Distribution') fig.suptitle('Start to Finish: Pre-Reading to Post-Tutoring', fontsize=14, fontweight='bold') fig.tight_layout(); fig.savefig(PLOT_DIR / 'C1_start_to_finish.png'); plt.close(fig) def plot_C2_learning_gains(df): pivot = df.pivot_table(index=['Participant','Topic','Medium'], columns='Zeitpunkt', values='Score_Pct').reset_index() g = pd.DataFrame({'Medium': pivot['Medium'], 'Reading': pivot.get('Post-Reading',0)-pivot.get('Pre-Reading',0), 'Tutoring': pivot.get('Post-Tutoring',0)-pivot.get('Pre-Tutoring',0), 'Total': pivot.get('Post-Tutoring',0)-pivot.get('Pre-Reading',0)}) fig, axes = plt.subplots(1, 3, figsize=(18, 6)) # Scatter ax = axes[0] for m in MEDIUM_ORDER: sub = g[g['Medium']==m] ax.scatter(sub['Reading'], sub['Tutoring'], color=MEDIUM_COLORS[m], s=60, alpha=0.7, edgecolors='white', lw=0.5, label=m) ax.axhline(0, color='gray', lw=0.8, alpha=0.5); ax.axvline(0, color='gray', lw=0.8, alpha=0.5) ax.set_xlabel('Reading Gain (%)'); ax.set_ylabel('Tutoring Gain (%)'); ax.legend(title='Medium'); ax.set_title('Reading vs Tutoring') # Bar ax = axes[1]; gm = g.groupby('Medium')[['Reading','Tutoring']].agg(['mean','sem']); xp = np.arange(3); w = 0.35 for i, (gt, c, l) in enumerate([('Reading','#1976D2','Reading'),('Tutoring','#E65100','Tutoring')]): ms = [gm.loc[m,(gt,'mean')] for m in MEDIUM_ORDER]; se = [gm.loc[m,(gt,'sem')]*1.96 for m in MEDIUM_ORDER] bars = ax.bar(xp+i*w-w/2, ms, w, yerr=se, color=c, alpha=0.8, capsize=4, label=l) for b, v in zip(bars, ms): ax.text(b.get_x()+b.get_width()/2, b.get_height()+1, f'{v:.1f}', ha='center', fontsize=9) ax.set_xticks(xp); ax.set_xticklabels(MEDIUM_ORDER); ax.set_ylabel('Score Gain (%)'); ax.axhline(0, color='gray', lw=0.8); ax.legend(); ax.set_title('Mean Gains by Medium') # Total ax = axes[2]; tm = g.groupby('Medium')['Total'].agg(['mean','sem']) bars = ax.bar(MEDIUM_ORDER, [tm.loc[m,'mean'] for m in MEDIUM_ORDER], color=[MEDIUM_COLORS[m] for m in MEDIUM_ORDER], alpha=0.8, yerr=[tm.loc[m,'sem']*1.96 for m in MEDIUM_ORDER], capsize=5) for b, m in zip(bars, MEDIUM_ORDER): ax.text(b.get_x()+b.get_width()/2, b.get_height()+1, f'{tm.loc[m,"mean"]:.1f}%', ha='center', fontsize=10, fontweight='bold') ax.set_ylabel('Total Gain (%)'); ax.axhline(0, color='gray', lw=0.8); ax.set_title('Total Learning Gain') fig.suptitle('Learning Gains Overview', fontsize=14, fontweight='bold') fig.tight_layout(); fig.savefig(PLOT_DIR / 'C2_learning_gains.png'); plt.close(fig) # ============================================================================= # D. CONFIDENCE ANALYSIS # ============================================================================= def plot_D1_confidence_vs_score(df): fig, axes = plt.subplots(1, 3, figsize=(18, 6), sharey=True) ax = axes[0] ax.scatter(df['Score_Pct'], df['Avg_Confidence'], alpha=0.4, s=40, c='#546E7A', edgecolors='white', lw=0.5) mask = df[['Score_Pct','Avg_Confidence']].dropna().index; xr = df.loc[mask,'Score_Pct']; yr = df.loc[mask,'Avg_Confidence'] if len(xr) > 2: z = np.polyfit(xr, yr, 1); xl = np.linspace(xr.min(), xr.max(), 100) ax.plot(xl, np.poly1d(z)(xl), 'r-', lw=2, alpha=0.8) r = np.corrcoef(xr, yr)[0,1] ax.text(0.05, 0.95, f'r = {r:.3f}', transform=ax.transAxes, fontsize=12, va='top', fontweight='bold', bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5)) ax.set_xlabel('Test Score (%)'); ax.set_ylabel('Avg Confidence (1-7)'); ax.set_title('Overall') ax = axes[1] for phase, c in zip(PHASE_ORDER, ['#E8EAF6','#C5CAE9','#7986CB','#3F51B5']): sub = df[df['Zeitpunkt']==phase] ax.scatter(sub['Score_Pct'], sub['Avg_Confidence'], alpha=0.5, s=40, label=PHASE_SHORT[phase], edgecolors='white', lw=0.5) ax.legend(fontsize=9, title='Phase'); ax.set_xlabel('Test Score (%)'); ax.set_title('By Phase') ax = axes[2] for m in MEDIUM_ORDER: sub = df[df['Medium']==m] ax.scatter(sub['Score_Pct'], sub['Avg_Confidence'], alpha=0.5, s=40, color=MEDIUM_COLORS[m], label=m, edgecolors='white', lw=0.5) ax.legend(fontsize=9, title='Medium'); ax.set_xlabel('Test Score (%)'); ax.set_title('By Medium') fig.suptitle('Confidence vs Test Score', fontsize=14, fontweight='bold') fig.tight_layout(); fig.savefig(PLOT_DIR / 'D1_confidence_vs_score.png'); plt.close(fig) def plot_D2_delta_conf_vs_score(df): ps = df.pivot_table(index=['Participant','Topic','Medium'], columns='Zeitpunkt', values='Score_Pct') pc = df.pivot_table(index=['Participant','Topic','Medium'], columns='Zeitpunkt', values='Avg_Confidence') d = pd.DataFrame({ 'R_S': ps.get('Post-Reading',0)-ps.get('Pre-Reading',0), 'R_C': pc.get('Post-Reading',0)-pc.get('Pre-Reading',0), 'T_S': ps.get('Post-Tutoring',0)-ps.get('Pre-Tutoring',0), 'T_C': pc.get('Post-Tutoring',0)-pc.get('Pre-Tutoring',0), 'A_S': ps.get('Post-Tutoring',0)-ps.get('Pre-Reading',0), 'A_C': pc.get('Post-Tutoring',0)-pc.get('Pre-Reading',0), }).reset_index().dropna() fig, axes = plt.subplots(1, 3, figsize=(18, 6)) for ax, (sx, sy, title) in zip(axes, [('R_S','R_C','Reading Phase'),('T_S','T_C','Tutoring Phase'),('A_S','A_C','Total')]): for m in MEDIUM_ORDER: sub = d[d['Medium']==m] ax.scatter(sub[sx], sub[sy], color=MEDIUM_COLORS[m], s=50, alpha=0.6, edgecolors='white', label=m) xv, yv = d[sx].values, d[sy].values if len(xv) > 2: z = np.polyfit(xv, yv, 1); xl = np.linspace(xv.min(), xv.max(), 100) ax.plot(xl, np.poly1d(z)(xl), 'r-', lw=1.5, alpha=0.7) r = np.corrcoef(xv, yv)[0,1] ax.text(0.05, 0.95, f'r = {r:.3f}', transform=ax.transAxes, fontsize=11, va='top', fontweight='bold', bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5)) ax.axhline(0, color='gray', lw=0.8, alpha=0.5); ax.axvline(0, color='gray', lw=0.8, alpha=0.5) ax.set_xlabel('Score Change (%)'); ax.set_ylabel('Confidence Change'); ax.set_title(title); ax.legend(title='Medium', fontsize=8) fig.suptitle('Do Changes in Confidence Track Changes in Score?', fontsize=14, fontweight='bold') fig.tight_layout(); fig.savefig(PLOT_DIR / 'D2_delta_conf_vs_score.png'); plt.close(fig) def plot_D3_calibration(df): fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6)) dc = df[['Score_Pct','Avg_Confidence','Zeitpunkt']].dropna().copy() dc['Bin'] = pd.cut(dc['Avg_Confidence'], bins=[0.5,1.5,2.5,3.5,4.5,5.5,6.5,7.5], labels=list('1234567')) cal = dc.groupby('Bin', observed=True)['Score_Pct'].agg(['mean','sem','count']) cal = cal[cal['count']>=3] ax1.bar(cal.index.astype(str), cal['mean'], yerr=cal['sem']*1.96, capsize=4, color='#5C6BC0', alpha=0.8, edgecolor='white') for idx, row in cal.iterrows(): ax1.text(idx, row['mean']+2, f'n={int(row["count"])}', ha='center', fontsize=8, color='gray') ax1.set_xlabel('Confidence Rating'); ax1.set_ylabel('Mean Test Score (%)'); ax1.set_title('Overall Calibration') pcol = {'Pre-Reading':'#E8EAF6','Post-Reading':'#9FA8DA','Pre-Tutoring':'#5C6BC0','Post-Tutoring':'#283593'} for phase in PHASE_ORDER: sub = dc[dc['Zeitpunkt']==phase] if len(sub) < 5: continue bins = pd.cut(sub['Avg_Confidence'], bins=[0.5,2.5,4.5,7.5], labels=['Low (1-2)','Med (3-4)','High (5-7)']) ms = sub.groupby(bins, observed=True)['Score_Pct'].mean() ax2.plot(ms.index.astype(str), ms.values, marker='o', lw=2, ms=8, color=pcol[phase], label=PHASE_SHORT[phase]) ax2.set_xlabel('Confidence Level'); ax2.set_ylabel('Mean Test Score (%)'); ax2.legend(title='Phase'); ax2.set_title('Calibration by Phase') fig.suptitle('Confidence Calibration', fontsize=14, fontweight='bold') fig.tight_layout(); fig.savefig(PLOT_DIR / 'D3_calibration.png'); plt.close(fig) # ============================================================================= # E. PERSONALITY CORRELATIONS # ============================================================================= def plot_E1_personality_correlations(merged): outcomes = ['Mean_Score_Gain','Mean_Conf_Gain','Mean_Total_Gain','Mean_Pre_Score','Mean_Post_Score','Mean_Pre_Conf','Mean_Post_Conf'] labels = ['Tutor\nScore Gain','Tutor\nConf Gain','Total\nGain','Pre-Tutor\nScore','Post-Tutor\nScore','Pre-Tutor\nConf','Post-Tutor\nConf'] corr = np.zeros((5, 7)); pvals = np.zeros_like(corr) for i, t in enumerate(TRAIT_ORDER): for j, o in enumerate(outcomes): xv, yv = merged[t].values, merged[o].values mask = ~(np.isnan(xv)|np.isnan(yv)) if mask.sum() > 3: corr[i,j], pvals[i,j] = stats.pearsonr(xv[mask], yv[mask]) else: corr[i,j] = np.nan; pvals[i,j] = 1 fig, ax = plt.subplots(figsize=(12, 6)) sns.heatmap(pd.DataFrame(corr, index=TRAIT_ORDER, columns=labels), annot=True, fmt='.2f', cmap='RdBu_r', center=0, vmin=-0.7, vmax=0.7, ax=ax, linewidths=1, cbar_kws={'label':'Pearson r'}) for i in range(5): for j in range(7): star = '**' if pvals[i,j]<.01 else '*' if pvals[i,j]<.05 else '' if star: ax.text(j+0.5, i+0.75, star, ha='center', va='center', fontsize=12, fontweight='bold', color='black') ax.set_title('Big Five Traits vs Tutoring Outcomes (* p<.05, ** p<.01)', fontsize=13, fontweight='bold') fig.tight_layout(); fig.savefig(PLOT_DIR / 'E1_personality_correlations.png'); plt.close(fig) def plot_E2_trait_vs_score_gain(merged): fig, axes = plt.subplots(1, 5, figsize=(22, 5), sharey=True) for ax, trait in zip(axes, TRAIT_ORDER): xv, yv = merged[trait].values, merged['Mean_Score_Gain'].values mask = ~(np.isnan(xv)|np.isnan(yv)) ax.scatter(xv[mask], yv[mask], s=60, color=TRAIT_COLORS[trait], alpha=0.7, edgecolors='white', lw=0.5) for _, row in merged.iterrows(): if pd.notna(row[trait]) and pd.notna(row['Mean_Score_Gain']): ax.annotate(row['Participant'], (row[trait], row['Mean_Score_Gain']), fontsize=7, alpha=0.5, textcoords="offset points", xytext=(3,3)) if mask.sum() > 3: r, p = stats.pearsonr(xv[mask], yv[mask]) z = np.polyfit(xv[mask], yv[mask], 1); xl = np.linspace(xv[mask].min(), xv[mask].max(), 100) ax.plot(xl, np.poly1d(z)(xl), color=TRAIT_COLORS[trait], lw=2, alpha=0.6) sig = '*' if p<.05 else '' ax.text(0.05, 0.95, f'r={r:.2f} p={p:.3f}{sig}', transform=ax.transAxes, fontsize=10, va='top', bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5)) ax.axhline(0, color='gray', lw=0.5, alpha=0.5); ax.set_xlabel(trait, fontsize=11, fontweight='bold', color=TRAIT_COLORS[trait]); ax.set_xlim(1,7) axes[0].set_ylabel('Mean Tutoring Score Gain (%)', fontsize=11) fig.suptitle('Big Five Traits vs Tutoring Score Gains', fontsize=14, fontweight='bold') fig.tight_layout(); fig.savefig(PLOT_DIR / 'E2_trait_vs_score_gain.png'); plt.close(fig) # ============================================================================= # STATS EXPORT # ============================================================================= def export_stats(df, paired): rows = [] # --- Overall trajectory --- phase_means = df.groupby('Zeitpunkt', observed=True).agg( Mean_Score=('Score_Pct', 'mean'), SEM_Score=('Score_Pct', 'sem'), Mean_Confidence=('Avg_Confidence', 'mean'), SEM_Confidence=('Avg_Confidence', 'sem'), ).reindex(PHASE_ORDER) phase_means.index.name = 'Phase' phase_means.to_csv(STATS_DIR / 'overall_trajectory.csv', float_format='%.3f') # --- Start-to-finish gain --- pre_r = df[df['Zeitpunkt'] == 'Pre-Reading'][['Participant', 'Topic', 'Medium', 'Score_Pct']].copy() pre_r.columns = ['Participant', 'Topic', 'Medium', 'Start'] post_t = df[df['Zeitpunkt'] == 'Post-Tutoring'][['Participant', 'Topic', 'Medium', 'Score_Pct']].copy() post_t.columns = ['Participant', 'Topic', 'Medium', 'End'] sf = pre_r.merge(post_t, on=['Participant', 'Topic', 'Medium']) sf['Gain'] = sf['End'] - sf['Start'] t_sf, p_sf = stats.ttest_1samp(sf['Gain'].dropna(), 0) sf_summary = pd.DataFrame([{ 'Metric': 'Pre-Reading to Post-Tutoring', 'N': len(sf), 'Gain_Mean': sf['Gain'].mean(), 'Gain_SD': sf['Gain'].std(), 't_stat': t_sf, 'p_value': p_sf, }]) sf_summary.to_csv(STATS_DIR / 'start_to_finish.csv', index=False, float_format='%.3f') # --- Tutoring stats by medium --- med_rows = [] for m in MEDIUM_ORDER: sub = paired[paired['Medium'] == m] t_val, p_val = stats.ttest_rel(sub['Pre_Score'], sub['Post_Score']) d_score = cohens_d(sub['Pre_Score'], sub['Post_Score']) t_c, p_c = stats.ttest_rel(sub['Pre_Conf'], sub['Post_Conf']) d_conf = cohens_d(sub['Pre_Conf'], sub['Post_Conf']) all_m = df[df['Medium'] == m] med_rows.append({ 'Medium': m, 'N_pairs': len(sub), 'Avg_Score_Mean': all_m['Score_Pct'].mean(), 'Avg_Score_SD': all_m['Score_Pct'].std(), 'Avg_Conf_Mean': all_m['Avg_Confidence'].mean(), 'Avg_Conf_SD': all_m['Avg_Confidence'].std(), 'Pre_Score_Mean': sub['Pre_Score'].mean(), 'Post_Score_Mean': sub['Post_Score'].mean(), 'Score_Gain_Mean': sub['Score_Gain'].mean(), 'Score_Gain_SD': sub['Score_Gain'].std(), 'Score_Cohens_d': d_score, 'Score_t': t_val, 'Score_p': p_val, 'Pre_Conf_Mean': sub['Pre_Conf'].mean(), 'Post_Conf_Mean': sub['Post_Conf'].mean(), 'Conf_Gain_Mean': sub['Conf_Gain'].mean(), 'Conf_Gain_SD': sub['Conf_Gain'].std(), 'Conf_Cohens_d': d_conf, 'Conf_t': t_c, 'Conf_p': p_c, }) pd.DataFrame(med_rows).to_csv(STATS_DIR / 'tutoring_by_medium.csv', index=False, float_format='%.3f') # --- Tutoring stats by topic --- topic_rows = [] for topic in df['Topic'].unique(): sub_t = paired[paired['Topic'] == topic] all_t = df[df['Topic'] == topic] t_val, p_val = stats.ttest_rel(sub_t['Pre_Score'], sub_t['Post_Score']) d_score = cohens_d(sub_t['Pre_Score'], sub_t['Post_Score']) topic_rows.append({ 'Topic': topic, 'N_pairs': len(sub_t), 'Avg_Score_Mean': all_t['Score_Pct'].mean(), 'Avg_Score_SD': all_t['Score_Pct'].std(), 'Avg_Conf_Mean': all_t['Avg_Confidence'].mean(), 'Avg_Conf_SD': all_t['Avg_Confidence'].std(), 'Score_Gain_Mean': sub_t['Score_Gain'].mean(), 'Score_Gain_SD': sub_t['Score_Gain'].std(), 'Score_Cohens_d': d_score, 'Score_t': t_val, 'Score_p': p_val, }) pd.DataFrame(topic_rows).to_csv(STATS_DIR / 'tutoring_by_topic.csv', index=False, float_format='%.3f') # --- Participant summary --- part_rows = [] for pid in sorted(df['Participant'].unique(), key=lambda x: int(x[1:])): sub_df = df[df['Participant'] == pid] sub_p = paired[paired['Participant'] == pid] phases = sub_df.groupby('Zeitpunkt', observed=True)['Score_Pct'].mean().reindex(PHASE_ORDER) part_rows.append({ 'Participant': pid, 'N_Tests': len(sub_df), 'Avg_Score_Mean': sub_df['Score_Pct'].mean(), 'Avg_Conf_Mean': sub_df['Avg_Confidence'].mean(), 'Pre_Reading': phases.get('Pre-Reading', float('nan')), 'Post_Reading': phases.get('Post-Reading', float('nan')), 'Pre_Tutoring': phases.get('Pre-Tutoring', float('nan')), 'Post_Tutoring': phases.get('Post-Tutoring', float('nan')), 'Reading_Gain': phases.get('Post-Reading', float('nan')) - phases.get('Pre-Reading', float('nan')), 'Tutoring_Gain': sub_p['Score_Gain'].mean() if len(sub_p) else float('nan'), }) pd.DataFrame(part_rows).to_csv(STATS_DIR / 'participant_summary.csv', index=False, float_format='%.3f') # --- Tutoring gain by medium × topic --- mt_rows = [] for m in MEDIUM_ORDER: for topic in df['Topic'].unique(): sub = paired[(paired['Medium'] == m) & (paired['Topic'] == topic)] mt_rows.append({ 'Medium': m, 'Topic': topic, 'N': len(sub), 'Score_Gain_Mean': sub['Score_Gain'].mean() if len(sub) else float('nan'), 'Score_Gain_SD': sub['Score_Gain'].std() if len(sub) else float('nan'), 'Score_Gain_SEM': sub['Score_Gain'].sem() if len(sub) else float('nan'), }) pd.DataFrame(mt_rows).to_csv(STATS_DIR / 'tutoring_by_medium_topic.csv', index=False, float_format='%.3f') print(f" Stats exported to: {STATS_DIR}") # ============================================================================= # MAIN # ============================================================================= def main(): print("Loading data...") df = load_data() paired = build_paired_tutoring(df) personality = load_personality() # Build merged for personality analysis p_agg = paired.groupby('Participant').agg( Mean_Score_Gain=('Score_Gain','mean'), Mean_Conf_Gain=('Conf_Gain','mean'), Mean_Pre_Score=('Pre_Score','mean'), Mean_Post_Score=('Post_Score','mean'), Mean_Pre_Conf=('Pre_Conf','mean'), Mean_Post_Conf=('Post_Conf','mean'), ).reset_index() # Total gain pre_r = df[df['Zeitpunkt']=='Pre-Reading'][['Participant','Topic','Medium','Score_Pct']].copy() pre_r.columns = ['Participant','Topic','Medium','PreRead'] post_t = df[df['Zeitpunkt']=='Post-Tutoring'][['Participant','Topic','Medium','Score_Pct']].copy() post_t.columns = ['Participant','Topic','Medium','PostTutor'] total = pre_r.merge(post_t, on=['Participant','Topic','Medium']) total['TotalGain'] = total['PostTutor'] - total['PreRead'] tg = total.groupby('Participant')['TotalGain'].mean().reset_index() tg.columns = ['Participant','Mean_Total_Gain'] p_agg = p_agg.merge(tg, on='Participant', how='left') merged = p_agg.merge(personality, on='Participant', how='inner') print(f" {len(df)} test entries, {paired['Participant'].nunique()} participants, " f"{len(merged)} with personality data\n") # Generate all plots sections = [ ("A. Overall Learning Trajectory", [ ("A1", "Overall trajectory (score + confidence)", lambda: plot_A1_trajectory(df)), ("A2", "Trajectory by medium", lambda: plot_A2_trajectory_by_medium(df)), ("A3", "Trajectory by topic", lambda: plot_A3_trajectory_by_topic(df)), ("A4", "Participant-level heatmaps", lambda: plot_A4_heatmap(df)), ]), ("B. Tutoring Phase Deep-Dive", [ ("B1", "Paired slopes by medium (with stats)", lambda: plot_B1_tutoring_slopes_by_medium(paired)), ("B2", "Paired slopes by topic (with stats)", lambda: plot_B2_tutoring_slopes_by_topic(paired)), ("B3", "Tutoring gain by medium (effect sizes)", lambda: plot_B3_tutoring_gain_by_medium(paired)), ("B4", "Medium x topic interaction", lambda: plot_B4_tutoring_medium_topic(paired)), ("B5", "Tutoring effectiveness dashboard", lambda: plot_B5_tutoring_dashboard(paired)), ]), ("C. Start-to-Finish Gains", [ ("C1", "Pre-Reading to Post-Tutoring paired", lambda: plot_C1_start_to_finish(df)), ("C2", "Learning gains overview", lambda: plot_C2_learning_gains(df)), ]), ("D. Confidence Analysis", [ ("D1", "Confidence vs test score scatter", lambda: plot_D1_confidence_vs_score(df)), ("D2", "Change in confidence vs change in score", lambda: plot_D2_delta_conf_vs_score(df)), ("D3", "Confidence calibration", lambda: plot_D3_calibration(df)), ]), ("E. Personality Correlations", [ ("E1", "Big Five vs tutoring outcomes heatmap", lambda: plot_E1_personality_correlations(merged)), ("E2", "Trait vs tutoring score gain", lambda: plot_E2_trait_vs_score_gain(merged)), ]), ] for section_name, plots in sections: print(f"{section_name}") for code, desc, fn in plots: fn() print(f" [{code}] {desc}") print(f"\n16 plots saved to: {PLOT_DIR}") print("\nExporting statistics...") export_stats(df, paired) print("Done.") if __name__ == "__main__": main()