Files
Virtual-Tutor-Eval/generate_plots_questionnaires.py

1165 lines
50 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
generate_plots_questionnaires.py
Analyses all questionnaire data and generates plots:
- IMI (Intrinsic Motivation Inventory) subscales
- SUS (System Usability Scale) scores
- UEQ-S (User Experience Questionnaire Short) pragmatic/hedonic
- NASA-TLX workload subscales
- Godspeed tutor impression
- Social Presence (Legacy, 5 items)
- Cybersickness (tutoring only, 5 items)
- Stress / Readiness / Relaxation (Pre-test items)
- IOS (Inclusion of Other in Self)
- Reading vs Tutoring phase comparisons
- Correlations between questionnaire subscales and learning gains
Output: Data/plots_questionnaires/*.png
"""
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import seaborn as sns
from pathlib import Path
from scipy import stats
# =============================================================================
# PATHS & CONSTANTS
# =============================================================================
BASE = Path(__file__).resolve().parent / "Data"
PLOT_DIR = BASE / "plots_questionnaires"
PLOT_DIR.mkdir(exist_ok=True)
STATS_DIR = BASE / "stats"
STATS_DIR.mkdir(exist_ok=True)
MEDIUMS = ['Chat', 'Video', 'VR']
MED_COLORS = {'Chat': '#42A5F5', 'Video': '#FFA726', 'VR': '#66BB6A'}
sns.set_theme(style="whitegrid", font_scale=1.05)
plt.rcParams['figure.dpi'] = 150
plt.rcParams['savefig.bbox'] = 'tight'
# =============================================================================
# IMI SUBSCALE DEFINITIONS (indices into Post-Questionnaire cols 2-27)
#
# The 26 IMI items span columns 2-27 of both Reading and Tutoring post-
# questionnaires. Column 4 ("Ich fühle mich gestresst") is a stress item
# embedded among the IMI items, treated separately.
#
# IMI subscale mapping (1-indexed within IMI block, i.e. col 2 = item 1):
# Interest/Enjoyment: items 4,6,8,10,12,14,16 (cols 5,7,9,11,13,15,17 - 0-indexed from file but relative to block)
# Value/Usefulness: items 1,3,5,7,9,11,13 (cols 2,4,6,8,10,12,14 - but col 4 is stress, skip)
# Perceived Choice: items 2,15,17,19,21,23,25 (cols 3,16,18,20,22,24,26)
#
# Actually, let me map by the actual German text and standard IMI subscales:
# =============================================================================
# IMI items are at file columns 2-27 (26 items).
# Col 4 = second "stressed" item → treat as separate relaxed/stress item, not IMI.
# So IMI = 25 items at cols [2,3,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27]
# Standard IMI subscales by column index in the file (0-based):
# Interest/Enjoyment: 7(Spaß), 9(genossen), 13(genoss), 17(interessant), 19(angenehm), 25(spaßvoll), 5(dachte wie sehr genoss)
# → reverse: 14(langweilig)
IMI_INTEREST = {
'items': [5, 7, 9, 13, 17, 19, 25],
'reverse': [14],
'label': 'Interest/\nEnjoyment'
}
# Value/Usefulness: 2(Wert), 6(Konzentration), 8(Verbesserung), 12(wichtig), 15(Lerngewohnheiten), 18(nützlich), 21(Nutzen), 23(Schule), 27(Wert)
IMI_VALUE = {
'items': [2, 6, 8, 12, 15, 18, 21, 23, 27],
'reverse': [],
'label': 'Value/\nUsefulness'
}
# Perceived Choice: 3(Wahlmöglichkeit), 11(weil wollte), 24(Wahl hatte)
# → reverse: 10(keine Wahl), 16(keine andere Wahl), 20(musste), 22(weil musste), 26(nicht eigene Entscheidung)
IMI_CHOICE = {
'items': [3, 11, 24],
'reverse': [10, 16, 20, 22, 26],
'label': 'Perceived\nChoice'
}
IMI_SUBSCALES = [IMI_INTEREST, IMI_VALUE, IMI_CHOICE]
# SUS items: cols 32-41 in Post-Questionnaire-Tutoring (and NOT in Reading)
SUS_COLS = list(range(32, 42)) # 10 items
# UEQ-S items: cols 47-54 in Post-Questionnaire-Tutoring (8 items "Die Interaktion war:")
# In Post-Questionnaire-Reading: cols 28-35 (8 items "Die Erfahrung war:")
UEQS_COLS_TUTORING = list(range(47, 55)) # 8 items
UEQS_COLS_READING = list(range(28, 36)) # 8 items
# UEQ-S: items 1,2,3,4 = pragmatic quality; items 5,6,7,8 = hedonic quality
UEQS_PRAGMATIC = [0, 1, 2, 3] # relative indices within the 8 UEQ items
UEQS_HEDONIC = [4, 5, 6, 7]
# NASA-TLX: 6 items
# In Reading: cols 36-41
# In Tutoring: cols 86-91
NASATLX_COLS_READING = list(range(36, 42))
NASATLX_COLS_TUTORING = list(range(86, 92))
NASATLX_LABELS = ['Mental\nDemand', 'Physical\nDemand', 'Temporal\nDemand',
'Performance', 'Effort', 'Frustration']
# Godspeed: cols 56-79 in Post-Questionnaire-Tutoring (24 items)
GODSPEED_COLS = list(range(56, 80))
# Godspeed subscales (5 subscales, standard order):
# Anthropomorphism (5): items 1-5 → cols 56-60
# Animacy (6): items 6-11 → cols 61-66
# Likeability (5): items 12-16 → cols 67-71 (note: col 67 has trailing space typo in header)
# Perceived Intelligence (5): items 17-21 → cols 72-76
# Perceived Safety (3): items 22-24 → cols 77-79
GODSPEED_SUBSCALES = {
'Anthropo-\nmorphism': list(range(56, 61)),
'Animacy': list(range(61, 67)),
'Like-\nability': list(range(67, 72)),
'Perceived\nIntelligence': list(range(72, 77)),
'Perceived\nSafety': list(range(77, 80)),
}
# Social Presence (Legacy, 5 items): cols 80-84
SOCIAL_PRESENCE_COLS = list(range(80, 85))
SOCIAL_PRESENCE_LABELS = ['Face-to-face', 'Same room', 'Being watched',
'Aware of me', 'Tutor present']
# Cybersickness: cols 42-46 (5 items, tutoring only)
CYBERSICKNESS_COLS = list(range(42, 47))
CYBERSICKNESS_LABELS = ['General\nDiscomfort', 'Fatigue', 'Headache',
'Eye\nStrain', 'Difficulty\nConcentrating']
# IOS (Inclusion of Other in Self): col 28 in tutoring
IOS_COL = 28
# Extra items: cols 29-31
EXTRA_COLS = {'self_use': 29, 'felt_helpful': 30, 'session_length': 31}
# Stress items
STRESS_COL = 1 # "Ich fühle mich gestresst" (both pre/post)
READY_COL = 2 # Pre-test: "Ich fühle mich bereit"
RELAXED_COL = 3 # Pre-test: "Ich fühle mich entspannt"
# Recommend tutor: col 85
RECOMMEND_COL = 85
# BFI-15 cols in Final-Questionnaire: cols 1-15
BFI_COLS = list(range(1, 16))
BFI_TRAITS = {
'Neuroticism': {'items': [1, 2], 'reverse': [3]},
'Extraversion': {'items': [4, 5], 'reverse': [6]},
'Openness': {'items': [7, 8, 9], 'reverse': []},
'Agreeableness': {'items': [11, 12], 'reverse': [10]},
'Conscientiousness': {'items': [13, 15], 'reverse': [14]},
}
# =============================================================================
# DATA LOADING
# =============================================================================
def load_csv(filename):
"""Load a CSV, return DataFrame with all columns by position (iloc)."""
path = BASE / filename
df = pd.read_csv(path, encoding='utf-8-sig')
return df
def safe_numeric(series):
"""Convert series to numeric, coercing errors to NaN."""
return pd.to_numeric(series, errors='coerce')
def compute_imi(df, subscale, offset=0):
"""Compute IMI subscale mean from df using column indices.
offset: shift all indices if the columns start at a different position."""
items = [df.iloc[:, i + offset] for i in subscale['items']]
reverse = [8 - df.iloc[:, i + offset] for i in subscale['reverse']] # 7-point: reverse = 8 - x
all_items = [safe_numeric(s) for s in items + reverse]
return pd.concat(all_items, axis=1).mean(axis=1)
def compute_sus(df, cols):
"""Compute SUS score (0-100) from 10 items at given column indices.
Odd items (1,3,5,7,9): score = response - 1
Even items (2,4,6,8,10): score = 5 - response
SUS = sum * 2.5"""
scores = []
for i, col_idx in enumerate(cols):
val = safe_numeric(df.iloc[:, col_idx])
if i % 2 == 0: # odd items (0-indexed even)
scores.append(val - 1)
else: # even items (0-indexed odd)
scores.append(5 - val)
return pd.concat(scores, axis=1).sum(axis=1) * 2.5
def compute_ueqs(df, cols):
"""Compute UEQ-S pragmatic and hedonic quality.
Items are on 1-7 scale, centered to -3 to +3."""
vals = pd.concat([safe_numeric(df.iloc[:, c]) for c in cols], axis=1)
centered = vals - 4 # center: 1→-3, 4→0, 7→3
pragmatic = centered.iloc[:, UEQS_PRAGMATIC].mean(axis=1)
hedonic = centered.iloc[:, UEQS_HEDONIC].mean(axis=1)
overall = centered.mean(axis=1)
return pragmatic, hedonic, overall
def compute_nasatlx(df, cols):
"""Compute NASA-TLX subscales and overall workload.
All items 1-7 scale. Item 4 (Performance/success) is inverted: high = good."""
subs = {}
for i, label in enumerate(NASATLX_LABELS):
val = safe_numeric(df.iloc[:, cols[i]])
if i == 3: # Performance — reverse so high = high workload
subs[label] = 8 - val
else:
subs[label] = val
overall = pd.DataFrame(subs).mean(axis=1)
return subs, overall
def compute_godspeed(df):
"""Compute Godspeed subscales from tutoring questionnaire."""
result = {}
for name, cols in GODSPEED_SUBSCALES.items():
vals = pd.concat([safe_numeric(df.iloc[:, c]) for c in cols], axis=1)
result[name] = vals.mean(axis=1)
result['Overall'] = pd.concat([safe_numeric(df.iloc[:, c]) for c in GODSPEED_COLS], axis=1).mean(axis=1)
return result
def compute_social_presence(df):
"""Compute social presence from 5 items."""
vals = pd.concat([safe_numeric(df.iloc[:, c]) for c in SOCIAL_PRESENCE_COLS], axis=1)
return vals, vals.mean(axis=1)
def compute_bfi(df):
"""Compute BFI-15 trait scores."""
traits = {}
for trait, spec in BFI_TRAITS.items():
items = [safe_numeric(df.iloc[:, i]) for i in spec['items']]
reverse = [8 - safe_numeric(df.iloc[:, i]) for i in spec['reverse']]
all_items = items + reverse
traits[trait] = pd.concat(all_items, axis=1).mean(axis=1)
return traits
# =============================================================================
# MAIN
# =============================================================================
def main():
print("Loading questionnaire data...")
# Load all questionnaires
pre_read = load_csv("Pre-Test-Reading.csv")
post_read = load_csv("Post-Questionnaire-Reading.csv")
pre_tutor = load_csv("Pre-Test-Tutoring.csv")
post_tutor = load_csv("Post-Questionnaire-Tutoring.csv")
final = load_csv("Final-Questionnaire.csv")
test_scores = load_csv("test_scores_all.csv")
# Normalize Zeitpunkt in test scores
test_scores['Zeitpunkt'] = test_scores['Zeitpunkt'].str.strip().replace('Pre-Tutor', 'Pre-Tutoring')
# Add Medium/Participant columns as named references
# IMPORTANT: capture values BEFORE adding new columns, because iloc[:, -1]
# shifts when new columns are appended to the DataFrame.
for df in [post_read, post_tutor, pre_read, pre_tutor]:
med_vals = df.iloc[:, -2].values.copy()
part_vals = df.iloc[:, -1].values.copy()
df['Medium_col'] = med_vals
df['Participant_col'] = part_vals
print(f" Pre-Reading: {len(pre_read)} rows")
print(f" Post-Reading: {len(post_read)} rows")
print(f" Pre-Tutoring: {len(pre_tutor)} rows")
print(f" Post-Tutoring:{len(post_tutor)} rows")
print(f" Final: {len(final)} rows")
plot_num = 0
def save(fig, name, desc):
nonlocal plot_num
plot_num += 1
tag = f"Q{plot_num:02d}"
fname = f"{tag}_{name}.png"
fig.savefig(PLOT_DIR / fname)
plt.close(fig)
print(f" [{tag}] {desc}")
# =========================================================================
# Q01: IMI Subscales — Reading vs Tutoring by Medium
# =========================================================================
print("\nA. Intrinsic Motivation (IMI)")
# Compute IMI for reading phase
imi_read = pd.DataFrame({
sub['label']: compute_imi(post_read, sub) for sub in IMI_SUBSCALES
})
imi_read['Medium'] = post_read['Medium_col']
imi_read['Phase'] = 'Reading'
# Compute IMI for tutoring phase
imi_tutor = pd.DataFrame({
sub['label']: compute_imi(post_tutor, sub) for sub in IMI_SUBSCALES
})
imi_tutor['Medium'] = post_tutor['Medium_col']
imi_tutor['Phase'] = 'Tutoring'
imi_all = pd.concat([imi_read, imi_tutor], ignore_index=True)
fig, axes = plt.subplots(1, 3, figsize=(15, 5), sharey=True)
for ax, sub in zip(axes, IMI_SUBSCALES):
label = sub['label']
data_r = imi_all[imi_all['Phase'] == 'Reading']
data_t = imi_all[imi_all['Phase'] == 'Tutoring']
x = np.arange(3)
width = 0.35
means_r = [data_r[data_r['Medium'] == m][label].mean() for m in MEDIUMS]
means_t = [data_t[data_t['Medium'] == m][label].mean() for m in MEDIUMS]
sems_r = [data_r[data_r['Medium'] == m][label].sem() for m in MEDIUMS]
sems_t = [data_t[data_t['Medium'] == m][label].sem() for m in MEDIUMS]
bars_r = ax.bar(x - width/2, means_r, width, yerr=sems_r, capsize=3,
label='Reading', color='#BBDEFB', edgecolor='#1565C0', linewidth=1)
bars_t = ax.bar(x + width/2, means_t, width, yerr=sems_t, capsize=3,
label='Tutoring', color='#C8E6C9', edgecolor='#2E7D32', linewidth=1)
ax.set_xticks(x)
ax.set_xticklabels(MEDIUMS)
ax.set_title(label.replace('\n', ' '), fontsize=12, fontweight='bold')
ax.set_ylim(1, 7)
ax.yaxis.set_major_locator(ticker.MultipleLocator(1))
if ax == axes[0]:
ax.set_ylabel('Mean Score (1-7)')
ax.legend(fontsize=9)
fig.suptitle('IMI Subscales: Reading vs Tutoring by Medium', fontsize=14, fontweight='bold')
fig.tight_layout()
save(fig, 'imi_by_medium', 'IMI subscales by medium (Reading vs Tutoring)')
# =========================================================================
# Q02: SUS by Medium (Tutoring only)
# =========================================================================
print("\nB. System Usability (SUS)")
sus_scores = compute_sus(post_tutor, SUS_COLS)
sus_df = pd.DataFrame({'SUS': sus_scores, 'Medium': post_tutor['Medium_col']})
fig, ax = plt.subplots(figsize=(8, 5))
for i, m in enumerate(MEDIUMS):
vals = sus_df[sus_df['Medium'] == m]['SUS'].dropna()
bp = ax.boxplot([vals], positions=[i], widths=0.5, patch_artist=True,
boxprops=dict(facecolor=MED_COLORS[m], alpha=0.6),
medianprops=dict(color='black', linewidth=2))
ax.scatter([i]*len(vals), vals, color=MED_COLORS[m], alpha=0.7, zorder=3, s=40)
ax.text(i, vals.mean() + 2, f'M={vals.mean():.1f}', ha='center', fontsize=10, fontweight='bold')
ax.set_xticks(range(3))
ax.set_xticklabels(MEDIUMS, fontsize=12)
ax.set_ylabel('SUS Score (0-100)')
ax.set_ylim(0, 105)
ax.axhline(68, color='gray', ls='--', alpha=0.5, label='Average threshold (68)')
ax.axhline(80, color='green', ls='--', alpha=0.3, label='Good threshold (80)')
ax.legend(fontsize=9)
ax.set_title('System Usability Scale (SUS) by Medium — Tutoring Phase', fontsize=13, fontweight='bold')
fig.tight_layout()
save(fig, 'sus_by_medium', 'SUS scores by medium')
# =========================================================================
# Q03: UEQ-S — Pragmatic vs Hedonic, Reading vs Tutoring
# =========================================================================
print("\nC. User Experience (UEQ-S)")
# Reading
prag_r, hed_r, overall_r = compute_ueqs(post_read, UEQS_COLS_READING)
ueq_read = pd.DataFrame({'Pragmatic': prag_r, 'Hedonic': hed_r, 'Overall': overall_r,
'Medium': post_read['Medium_col'], 'Phase': 'Reading'})
# Tutoring
prag_t, hed_t, overall_t = compute_ueqs(post_tutor, UEQS_COLS_TUTORING)
ueq_tutor = pd.DataFrame({'Pragmatic': prag_t, 'Hedonic': hed_t, 'Overall': overall_t,
'Medium': post_tutor['Medium_col'], 'Phase': 'Tutoring'})
ueq_all = pd.concat([ueq_read, ueq_tutor], ignore_index=True)
fig, axes = plt.subplots(1, 3, figsize=(15, 5), sharey=True)
for ax, dim in zip(axes, ['Pragmatic', 'Hedonic', 'Overall']):
x = np.arange(3)
width = 0.35
means_r = [ueq_all[(ueq_all['Phase']=='Reading') & (ueq_all['Medium']==m)][dim].mean() for m in MEDIUMS]
means_t = [ueq_all[(ueq_all['Phase']=='Tutoring') & (ueq_all['Medium']==m)][dim].mean() for m in MEDIUMS]
sems_r = [ueq_all[(ueq_all['Phase']=='Reading') & (ueq_all['Medium']==m)][dim].sem() for m in MEDIUMS]
sems_t = [ueq_all[(ueq_all['Phase']=='Tutoring') & (ueq_all['Medium']==m)][dim].sem() for m in MEDIUMS]
ax.bar(x - width/2, means_r, width, yerr=sems_r, capsize=3,
label='Reading', color='#BBDEFB', edgecolor='#1565C0', linewidth=1)
ax.bar(x + width/2, means_t, width, yerr=sems_t, capsize=3,
label='Tutoring', color='#C8E6C9', edgecolor='#2E7D32', linewidth=1)
ax.set_xticks(x)
ax.set_xticklabels(MEDIUMS)
ax.set_title(dim, fontsize=12, fontweight='bold')
ax.set_ylim(-3, 3)
ax.axhline(0, color='black', ls='-', lw=0.5)
ax.axhline(0.8, color='green', ls=':', alpha=0.4, label='Good (>0.8)' if dim == 'Overall' else None)
ax.axhline(-0.8, color='red', ls=':', alpha=0.4, label='Bad (<-0.8)' if dim == 'Overall' else None)
if ax == axes[0]:
ax.set_ylabel('UEQ-S Score (-3 to +3)')
ax.legend(fontsize=8)
if ax == axes[2]:
ax.legend(fontsize=8)
fig.suptitle('UEQ-S: Pragmatic & Hedonic Quality — Reading vs Tutoring', fontsize=14, fontweight='bold')
fig.tight_layout()
save(fig, 'ueqs_by_medium', 'UEQ-S pragmatic/hedonic by medium')
# =========================================================================
# Q04: NASA-TLX — Reading vs Tutoring
# =========================================================================
print("\nD. Workload (NASA-TLX)")
tlx_r_subs, tlx_r_overall = compute_nasatlx(post_read, NASATLX_COLS_READING)
tlx_r = pd.DataFrame(tlx_r_subs)
tlx_r['Overall'] = tlx_r_overall
tlx_r['Medium'] = post_read['Medium_col']
tlx_r['Phase'] = 'Reading'
tlx_t_subs, tlx_t_overall = compute_nasatlx(post_tutor, NASATLX_COLS_TUTORING)
tlx_t = pd.DataFrame(tlx_t_subs)
tlx_t['Overall'] = tlx_t_overall
tlx_t['Medium'] = post_tutor['Medium_col']
tlx_t['Phase'] = 'Tutoring'
tlx_all = pd.concat([tlx_r, tlx_t], ignore_index=True)
# NASA-TLX: direct medium comparison for tutoring phase (grouped bar)
fig, ax = plt.subplots(figsize=(12, 5))
sub_names = NASATLX_LABELS + ['Overall']
x = np.arange(len(sub_names))
width = 0.25
for i, m in enumerate(MEDIUMS):
data_t = tlx_all[(tlx_all['Phase'] == 'Tutoring') & (tlx_all['Medium'] == m)]
means = [data_t[s].mean() for s in sub_names]
sems = [data_t[s].sem() for s in sub_names]
ax.bar(x + (i - 1) * width, means, width, yerr=sems, capsize=2,
label=m, color=MED_COLORS[m], edgecolor='gray', linewidth=0.5)
ax.set_xticks(x)
ax.set_xticklabels(sub_names, fontsize=10)
ax.set_ylim(1, 7)
ax.set_ylabel('Workload Rating (1-7)')
ax.legend(fontsize=10)
ax.set_title('NASA-TLX Workload by Medium — Tutoring Phase', fontsize=13, fontweight='bold')
fig.tight_layout()
save(fig, 'nasatlx_by_medium', 'NASA-TLX by medium (Tutoring)')
# NASA-TLX: Reading vs Tutoring comparison (all mediums combined + per-medium)
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
# Left: Overall workload Reading vs Tutoring per medium
ax = axes[0]
x = np.arange(3)
width = 0.35
for phase_idx, (phase, color, edge) in enumerate([
('Reading', '#BBDEFB', '#1565C0'), ('Tutoring', '#C8E6C9', '#2E7D32')
]):
means = [tlx_all[(tlx_all['Phase']==phase) & (tlx_all['Medium']==m)]['Overall'].mean() for m in MEDIUMS]
sems = [tlx_all[(tlx_all['Phase']==phase) & (tlx_all['Medium']==m)]['Overall'].sem() for m in MEDIUMS]
offset = -width/2 if phase_idx == 0 else width/2
ax.bar(x + offset, means, width, yerr=sems, capsize=3,
label=phase, color=color, edgecolor=edge, linewidth=1)
ax.set_xticks(x)
ax.set_xticklabels(MEDIUMS, fontsize=11)
ax.set_ylim(1, 7)
ax.set_ylabel('Overall Workload (1-7)')
ax.set_title('Overall Workload: Reading vs Tutoring', fontweight='bold')
ax.legend(fontsize=9)
# Right: Per-subscale comparison (Tutoring only, all mediums overlaid)
ax = axes[1]
sub_only = NASATLX_LABELS
x2 = np.arange(len(sub_only))
width2 = 0.25
for i, m in enumerate(MEDIUMS):
data_t = tlx_all[(tlx_all['Phase'] == 'Tutoring') & (tlx_all['Medium'] == m)]
means = [data_t[s].mean() for s in sub_only]
ax.plot(x2, means, 'o-', color=MED_COLORS[m], label=m, linewidth=2, markersize=8)
ax.set_xticks(x2)
ax.set_xticklabels(sub_only, fontsize=8, rotation=20, ha='right')
ax.set_ylim(1, 7)
ax.set_ylabel('Rating (1-7)')
ax.set_title('Subscale Profiles by Medium (Tutoring)', fontweight='bold')
ax.legend(fontsize=9)
fig.suptitle('NASA-TLX Workload Comparison', fontsize=14, fontweight='bold')
fig.tight_layout()
save(fig, 'nasatlx_comparison', 'NASA-TLX reading vs tutoring comparison')
# =========================================================================
# Q05: Godspeed Subscales by Medium (Tutoring only)
# =========================================================================
print("\nE. Tutor Impression (Godspeed)")
godspeed = compute_godspeed(post_tutor)
gs_df = pd.DataFrame(godspeed)
gs_df['Medium'] = post_tutor['Medium_col']
gs_names = list(GODSPEED_SUBSCALES.keys()) + ['Overall']
fig, ax = plt.subplots(figsize=(12, 5))
x = np.arange(len(gs_names))
width = 0.25
for i, m in enumerate(MEDIUMS):
data = gs_df[gs_df['Medium'] == m]
means = [data[s].mean() for s in gs_names]
sems = [data[s].sem() for s in gs_names]
ax.bar(x + (i - 1) * width, means, width, yerr=sems, capsize=2,
label=m, color=MED_COLORS[m], edgecolor='gray', linewidth=0.5)
ax.set_xticks(x)
ax.set_xticklabels(gs_names, fontsize=10)
ax.set_ylim(1, 5)
ax.set_ylabel('Mean Rating (1-5)')
ax.legend()
ax.set_title('Godspeed Tutor Impression by Medium — Tutoring Phase', fontsize=13, fontweight='bold')
fig.tight_layout()
save(fig, 'godspeed_by_medium', 'Godspeed tutor impression by medium')
# =========================================================================
# Q06: Social Presence by Medium (Tutoring only)
# =========================================================================
print("\nF. Social Presence")
sp_items, sp_overall = compute_social_presence(post_tutor)
sp_items.columns = SOCIAL_PRESENCE_LABELS
sp_items['Overall'] = sp_overall
sp_items['Medium'] = post_tutor['Medium_col'].values
fig, ax = plt.subplots(figsize=(12, 5))
sp_names = SOCIAL_PRESENCE_LABELS + ['Overall']
x = np.arange(len(sp_names))
width = 0.25
for i, m in enumerate(MEDIUMS):
data = sp_items[sp_items['Medium'] == m]
means = [data[s].mean() for s in sp_names]
sems = [data[s].sem() for s in sp_names]
ax.bar(x + (i-1)*width, means, width, yerr=sems, capsize=2,
label=m, color=MED_COLORS[m], edgecolor='gray', linewidth=0.5)
ax.set_xticks(x)
ax.set_xticklabels(sp_names, fontsize=9)
ax.set_ylim(1, 5)
ax.set_ylabel('Rating (1-5)')
ax.legend()
ax.set_title('Social Presence (Legacy) by Medium — Tutoring Phase\n(Only filled by participants who wore Meta Quest Pro; N varies by medium)',
fontsize=11, fontweight='bold')
fig.tight_layout()
save(fig, 'social_presence_by_medium', 'Social presence by medium')
# =========================================================================
# Q07: Cybersickness by Medium (VR only, but show all for comparison)
# =========================================================================
print("\nG. Cybersickness")
cyber = pd.DataFrame({
label: safe_numeric(post_tutor.iloc[:, col])
for label, col in zip(CYBERSICKNESS_LABELS, CYBERSICKNESS_COLS)
})
# Recode: "Nein" → 0 (no symptom), "Ja" → 1 (has symptom)... wait, these might be Yes/No
# Check the actual values — they seem to be "Ja"/"Nein" strings
# Let me handle both cases
for label in CYBERSICKNESS_LABELS:
col_data = post_tutor.iloc[:, CYBERSICKNESS_COLS[CYBERSICKNESS_LABELS.index(label)]]
if col_data.dtype == object:
# String data: map Ja=1, Nein=0
cyber[label] = col_data.map({'Ja': 1, 'Nein': 0, 'ja': 1, 'nein': 0})
else:
cyber[label] = safe_numeric(col_data)
cyber['Medium'] = post_tutor['Medium_col'].values
# Check if binary or scale
is_binary = cyber[CYBERSICKNESS_LABELS[0]].dropna().isin([0, 1]).all()
fig, ax = plt.subplots(figsize=(10, 5))
x = np.arange(len(CYBERSICKNESS_LABELS))
width = 0.25
if is_binary:
# Show percentage reporting symptoms
for i, m in enumerate(MEDIUMS):
data = cyber[cyber['Medium'] == m]
pcts = [(data[s] == 1).sum() / len(data) * 100 for s in CYBERSICKNESS_LABELS]
ax.bar(x + (i-1)*width, pcts, width, label=m, color=MED_COLORS[m],
edgecolor='gray', linewidth=0.5)
ax.set_ylabel('% Reporting Symptom')
ax.set_ylim(0, 100)
else:
for i, m in enumerate(MEDIUMS):
data = cyber[cyber['Medium'] == m]
means = [data[s].mean() for s in CYBERSICKNESS_LABELS]
sems = [data[s].sem() for s in CYBERSICKNESS_LABELS]
ax.bar(x + (i-1)*width, means, width, yerr=sems, capsize=2,
label=m, color=MED_COLORS[m], edgecolor='gray', linewidth=0.5)
ax.set_ylabel('Severity Rating')
ax.set_xticks(x)
ax.set_xticklabels(CYBERSICKNESS_LABELS, fontsize=9)
ax.legend()
ax.set_title('Cybersickness Symptoms by Medium — Tutoring Phase', fontsize=13, fontweight='bold')
fig.tight_layout()
save(fig, 'cybersickness_by_medium', 'Cybersickness by medium')
# =========================================================================
# Q08: Stress / Readiness / Relaxation — Pre-Reading vs Pre-Tutoring
# =========================================================================
print("\nH. Pre-Session States")
pre_r_df = pd.DataFrame({
'Stressed': safe_numeric(pre_read.iloc[:, STRESS_COL]),
'Ready': safe_numeric(pre_read.iloc[:, READY_COL]),
'Relaxed': safe_numeric(pre_read.iloc[:, RELAXED_COL]),
'Medium': pre_read['Medium_col'],
'Phase': 'Pre-Reading'
})
pre_t_df = pd.DataFrame({
'Stressed': safe_numeric(pre_tutor.iloc[:, STRESS_COL]),
'Ready': safe_numeric(pre_tutor.iloc[:, READY_COL]),
'Relaxed': safe_numeric(pre_tutor.iloc[:, RELAXED_COL]),
'Medium': pre_tutor['Medium_col'],
'Phase': 'Pre-Tutoring'
})
pre_all = pd.concat([pre_r_df, pre_t_df], ignore_index=True)
fig, axes = plt.subplots(1, 3, figsize=(15, 5), sharey=True)
for ax, item in zip(axes, ['Stressed', 'Ready', 'Relaxed']):
x = np.arange(3)
width = 0.35
means_r = [pre_all[(pre_all['Phase']=='Pre-Reading') & (pre_all['Medium']==m)][item].mean() for m in MEDIUMS]
means_t = [pre_all[(pre_all['Phase']=='Pre-Tutoring') & (pre_all['Medium']==m)][item].mean() for m in MEDIUMS]
sems_r = [pre_all[(pre_all['Phase']=='Pre-Reading') & (pre_all['Medium']==m)][item].sem() for m in MEDIUMS]
sems_t = [pre_all[(pre_all['Phase']=='Pre-Tutoring') & (pre_all['Medium']==m)][item].sem() for m in MEDIUMS]
ax.bar(x - width/2, means_r, width, yerr=sems_r, capsize=3,
label='Pre-Reading', color='#BBDEFB', edgecolor='#1565C0')
ax.bar(x + width/2, means_t, width, yerr=sems_t, capsize=3,
label='Pre-Tutoring', color='#C8E6C9', edgecolor='#2E7D32')
ax.set_xticks(x)
ax.set_xticklabels(MEDIUMS)
ax.set_title(item, fontsize=12, fontweight='bold')
ax.set_ylim(1, 7)
if ax == axes[0]:
ax.set_ylabel('Rating (1-7)')
ax.legend(fontsize=9)
fig.suptitle('Pre-Session State: Reading vs Tutoring by Medium', fontsize=14, fontweight='bold')
fig.tight_layout()
save(fig, 'pre_session_states', 'Pre-session stress/readiness/relaxation')
# =========================================================================
# Q09: IOS (Inclusion of Other in Self) by Medium
# =========================================================================
print("\nI. Additional Measures")
ios_data = pd.DataFrame({
'IOS': safe_numeric(post_tutor.iloc[:, IOS_COL]),
'Medium': post_tutor['Medium_col']
})
fig, axes = plt.subplots(1, 3, figsize=(15, 4))
# IOS
ax = axes[0]
for i, m in enumerate(MEDIUMS):
vals = ios_data[ios_data['Medium'] == m]['IOS'].dropna()
bp = ax.boxplot([vals], positions=[i], widths=0.5, patch_artist=True,
boxprops=dict(facecolor=MED_COLORS[m], alpha=0.6),
medianprops=dict(color='black', linewidth=2))
ax.scatter([i]*len(vals), vals, color=MED_COLORS[m], alpha=0.7, zorder=3, s=30)
ax.text(i, vals.mean() + 0.3, f'M={vals.mean():.2f}', ha='center', fontsize=9, fontweight='bold')
ax.set_xticks(range(3))
ax.set_xticklabels(MEDIUMS)
ax.set_ylim(0.5, 7.5)
ax.set_ylabel('Closeness (1-7)')
ax.set_title('IOS: Closeness to Tutor', fontsize=11, fontweight='bold')
# Self-use willingness
ax = axes[1]
self_use = pd.DataFrame({
'Value': safe_numeric(post_tutor.iloc[:, EXTRA_COLS['self_use']]),
'Medium': post_tutor['Medium_col']
})
for i, m in enumerate(MEDIUMS):
vals = self_use[self_use['Medium'] == m]['Value'].dropna()
bp = ax.boxplot([vals], positions=[i], widths=0.5, patch_artist=True,
boxprops=dict(facecolor=MED_COLORS[m], alpha=0.6),
medianprops=dict(color='black', linewidth=2))
ax.scatter([i]*len(vals), vals, color=MED_COLORS[m], alpha=0.7, zorder=3, s=30)
ax.text(i, vals.mean() + 0.3, f'M={vals.mean():.2f}', ha='center', fontsize=9, fontweight='bold')
ax.set_xticks(range(3))
ax.set_xticklabels(MEDIUMS)
ax.set_ylim(0.5, 7.5)
ax.set_ylabel('Agreement (1-7)')
ax.set_title('Would Use This Method', fontsize=11, fontweight='bold')
# Felt helpful
ax = axes[2]
helpful = pd.DataFrame({
'Value': safe_numeric(post_tutor.iloc[:, EXTRA_COLS['felt_helpful']]),
'Medium': post_tutor['Medium_col']
})
for i, m in enumerate(MEDIUMS):
vals = helpful[helpful['Medium'] == m]['Value'].dropna()
bp = ax.boxplot([vals], positions=[i], widths=0.5, patch_artist=True,
boxprops=dict(facecolor=MED_COLORS[m], alpha=0.6),
medianprops=dict(color='black', linewidth=2))
ax.scatter([i]*len(vals), vals, color=MED_COLORS[m], alpha=0.7, zorder=3, s=30)
ax.text(i, vals.mean() + 0.3, f'M={vals.mean():.2f}', ha='center', fontsize=9, fontweight='bold')
ax.set_xticks(range(3))
ax.set_xticklabels(MEDIUMS)
ax.set_ylim(0.5, 7.5)
ax.set_ylabel('Agreement (1-7)')
ax.set_title('Felt Helpful for Review', fontsize=11, fontweight='bold')
fig.suptitle('Additional Tutoring Measures by Medium', fontsize=14, fontweight='bold')
fig.tight_layout()
save(fig, 'additional_measures', 'IOS, self-use, helpfulness by medium')
# =========================================================================
# Q10: Correlation Heatmap — Questionnaire Subscales vs Learning Gains
# =========================================================================
print("\nJ. Correlations")
# Build per-participant-topic tutoring gain
pre_scores = test_scores[test_scores['Zeitpunkt'] == 'Pre-Tutoring'].set_index(['Participant', 'Topic'])
post_scores = test_scores[test_scores['Zeitpunkt'] == 'Post-Tutoring'].set_index(['Participant', 'Topic'])
common_idx = pre_scores.index.intersection(post_scores.index)
gains = pd.DataFrame({
'Participant': [idx[0] for idx in common_idx],
'Topic': [idx[1] for idx in common_idx],
'Score_Gain': post_scores.loc[common_idx, 'Score_Pct'].astype(float).values - pre_scores.loc[common_idx, 'Score_Pct'].astype(float).values,
'Medium': post_scores.loc[common_idx, 'Medium'].values,
})
# Compute questionnaire scores per row of post_tutor and join with gains
q_scores = pd.DataFrame({
'Participant': post_tutor['Participant_col'].values,
'Medium': post_tutor['Medium_col'].values,
'IMI_Interest': compute_imi(post_tutor, IMI_INTEREST),
'IMI_Value': compute_imi(post_tutor, IMI_VALUE),
'IMI_Choice': compute_imi(post_tutor, IMI_CHOICE),
'SUS': compute_sus(post_tutor, SUS_COLS),
'NASA_TLX': compute_nasatlx(post_tutor, NASATLX_COLS_TUTORING)[1],
'Social_Presence': compute_social_presence(post_tutor)[1],
'IOS': safe_numeric(post_tutor.iloc[:, IOS_COL]),
})
# Add Godspeed overall
gs = compute_godspeed(post_tutor)
q_scores['Godspeed'] = gs['Overall']
# UEQ-S
_, _, ueq_overall_t = compute_ueqs(post_tutor, UEQS_COLS_TUTORING)
q_scores['UEQ_S'] = ueq_overall_t
# Match on participant + medium (since topic info isn't in questionnaire directly,
# we can aggregate by participant to get mean questionnaire scores)
q_agg = q_scores.groupby('Participant').mean(numeric_only=True)
gains_agg = gains.groupby('Participant')['Score_Gain'].mean()
merged = q_agg.join(gains_agg, how='inner')
# Correlation matrix (exclude Social Presence — VR-only, too many NaN)
corr_cols = ['IMI_Interest', 'IMI_Value', 'IMI_Choice', 'SUS', 'UEQ_S',
'NASA_TLX', 'IOS', 'Godspeed', 'Score_Gain']
corr_labels = ['IMI\nInterest', 'IMI\nValue', 'IMI\nChoice', 'SUS', 'UEQ-S',
'NASA-TLX', 'IOS', 'Godspeed', 'Tutoring\nScore Gain']
corr_data = merged[corr_cols]
corr_matrix = corr_data.corr()
# Also compute p-values
n = len(corr_data.dropna())
p_matrix = pd.DataFrame(np.ones((len(corr_cols), len(corr_cols))),
index=corr_cols, columns=corr_cols)
for i_c in range(len(corr_cols)):
for j_c in range(i_c + 1, len(corr_cols)):
valid = corr_data[[corr_cols[i_c], corr_cols[j_c]]].dropna()
if len(valid) >= 3:
r, p = stats.pearsonr(valid.iloc[:, 0], valid.iloc[:, 1])
p_matrix.iloc[i_c, j_c] = p
p_matrix.iloc[j_c, i_c] = p
fig, ax = plt.subplots(figsize=(10, 8))
n_vars = len(corr_cols)
# Relabel the correlation matrix
corr_plot = corr_matrix.copy()
corr_plot.index = corr_labels
corr_plot.columns = corr_labels
# Build annotation with significance stars
annot_strs = []
for i_c in range(n_vars):
row_strs = []
for j_c in range(n_vars):
r = corr_matrix.iloc[i_c, j_c]
if np.isnan(r):
row_strs.append('')
else:
p = p_matrix.iloc[i_c, j_c]
star = ''
if i_c != j_c:
if p < 0.01: star = '**'
elif p < 0.05: star = '*'
row_strs.append(f'{r:.2f}{star}')
annot_strs.append(row_strs)
annot_arr = np.array(annot_strs)
# Lower triangle mask
mask = np.triu(np.ones((n_vars, n_vars), dtype=bool), k=1)
sns.heatmap(corr_plot, mask=mask, annot=annot_arr, fmt='',
cmap='RdBu_r', center=0, vmin=-1, vmax=1,
linewidths=1, linecolor='white',
ax=ax, annot_kws={'fontsize': 10, 'fontweight': 'bold'},
cbar_kws={'shrink': 0.8})
ax.set_title('Questionnaire Subscale Correlations & Learning Gain\n(* p<.05, ** p<.01)',
fontsize=13, fontweight='bold')
fig.tight_layout()
save(fig, 'correlation_heatmap', 'Subscale correlations with learning gain')
# =========================================================================
# Q11: Reading vs Tutoring Phase Comparison Dashboard
# =========================================================================
print("\nK. Phase Comparisons")
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
# IMI Interest: Reading vs Tutoring (overall means)
ax = axes[0, 0]
for sub in IMI_SUBSCALES:
label = sub['label'].replace('\n', ' ')
r_mean = compute_imi(post_read, sub).mean()
t_mean = compute_imi(post_tutor, sub).mean()
r_sem = compute_imi(post_read, sub).sem()
t_sem = compute_imi(post_tutor, sub).sem()
idx = IMI_SUBSCALES.index(sub)
ax.errorbar([0, 1], [r_mean, t_mean], yerr=[r_sem, t_sem],
marker='o', capsize=4, label=label, linewidth=2)
ax.set_xticks([0, 1])
ax.set_xticklabels(['Reading', 'Tutoring'])
ax.set_ylabel('Mean Score (1-7)')
ax.set_ylim(2, 6)
ax.set_title('IMI Subscales', fontweight='bold')
ax.legend(fontsize=8)
# NASA-TLX Overall: Reading vs Tutoring by Medium
ax = axes[0, 1]
for m in MEDIUMS:
r_vals = tlx_all[(tlx_all['Phase']=='Reading') & (tlx_all['Medium']==m)]['Overall']
t_vals = tlx_all[(tlx_all['Phase']=='Tutoring') & (tlx_all['Medium']==m)]['Overall']
ax.errorbar([0, 1], [r_vals.mean(), t_vals.mean()],
yerr=[r_vals.sem(), t_vals.sem()],
marker='o', capsize=4, label=m, color=MED_COLORS[m], linewidth=2)
ax.set_xticks([0, 1])
ax.set_xticklabels(['Reading', 'Tutoring'])
ax.set_ylabel('Workload (1-7)')
ax.set_title('NASA-TLX Overall', fontweight='bold')
ax.legend(fontsize=9)
# UEQ-S Overall: Reading vs Tutoring by Medium
ax = axes[1, 0]
for m in MEDIUMS:
r_vals = ueq_all[(ueq_all['Phase']=='Reading') & (ueq_all['Medium']==m)]['Overall']
t_vals = ueq_all[(ueq_all['Phase']=='Tutoring') & (ueq_all['Medium']==m)]['Overall']
ax.errorbar([0, 1], [r_vals.mean(), t_vals.mean()],
yerr=[r_vals.sem(), t_vals.sem()],
marker='o', capsize=4, label=m, color=MED_COLORS[m], linewidth=2)
ax.set_xticks([0, 1])
ax.set_xticklabels(['Reading', 'Tutoring'])
ax.set_ylabel('UEQ-S Score (-3 to +3)')
ax.set_title('UEQ-S Overall', fontweight='bold')
ax.axhline(0, color='black', ls='-', lw=0.5)
ax.legend(fontsize=9)
# Stress: Pre-Reading vs Pre-Tutoring by Medium
ax = axes[1, 1]
for m in MEDIUMS:
r_vals = pre_all[(pre_all['Phase']=='Pre-Reading') & (pre_all['Medium']==m)]['Stressed']
t_vals = pre_all[(pre_all['Phase']=='Pre-Tutoring') & (pre_all['Medium']==m)]['Stressed']
ax.errorbar([0, 1], [r_vals.mean(), t_vals.mean()],
yerr=[r_vals.sem(), t_vals.sem()],
marker='o', capsize=4, label=m, color=MED_COLORS[m], linewidth=2)
ax.set_xticks([0, 1])
ax.set_xticklabels(['Pre-Reading', 'Pre-Tutoring'])
ax.set_ylabel('Stress Rating (1-7)')
ax.set_title('Pre-Session Stress', fontweight='bold')
ax.legend(fontsize=9)
fig.suptitle('Reading vs Tutoring Phase Comparison', fontsize=15, fontweight='bold')
fig.tight_layout()
save(fig, 'phase_comparison_dashboard', 'Reading vs Tutoring phase comparison dashboard')
# =========================================================================
# Q12: VR-Specific Analysis (Social Presence + Cybersickness + Godspeed)
# =========================================================================
print("\nL. VR-Specific Analysis")
vr_data = post_tutor[post_tutor['Medium_col'] == 'VR']
fig, axes = plt.subplots(1, 3, figsize=(16, 5))
# Social Presence: VR vs others
ax = axes[0]
for i, m in enumerate(MEDIUMS):
data = sp_items[sp_items['Medium'] == m]
means = data['Overall'].mean()
sems = data['Overall'].sem()
color = MED_COLORS[m]
ax.bar(i, means, yerr=sems, capsize=4, color=color, edgecolor='gray',
width=0.6, label=m)
ax.text(i, means + sems + 0.15, f'{means:.2f}', ha='center', fontsize=10, fontweight='bold')
ax.set_xticks(range(3))
ax.set_xticklabels(MEDIUMS)
ax.set_ylabel('Mean Social Presence (1-5)')
ax.set_ylim(1, 5)
ax.set_title('Social Presence', fontweight='bold')
# Cybersickness: VR-specific item detail
ax = axes[1]
if is_binary:
vr_cyber = cyber[cyber['Medium'] == 'VR']
pcts = [(vr_cyber[s] == 1).sum() / len(vr_cyber) * 100 for s in CYBERSICKNESS_LABELS]
colors = ['#EF5350' if p > 30 else '#FFA726' if p > 10 else '#66BB6A' for p in pcts]
bars = ax.bar(range(len(CYBERSICKNESS_LABELS)), pcts, color=colors, edgecolor='gray')
for bar, p in zip(bars, pcts):
ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 1.5,
f'{p:.0f}%', ha='center', fontsize=10, fontweight='bold')
ax.set_ylabel('% VR Participants Reporting')
ax.set_ylim(0, 100)
else:
vr_cyber = cyber[cyber['Medium'] == 'VR']
means = [vr_cyber[s].mean() for s in CYBERSICKNESS_LABELS]
sems = [vr_cyber[s].sem() for s in CYBERSICKNESS_LABELS]
ax.bar(range(len(CYBERSICKNESS_LABELS)), means, yerr=sems, capsize=3,
color='#66BB6A', edgecolor='gray')
ax.set_ylabel('Severity')
ax.set_xticks(range(len(CYBERSICKNESS_LABELS)))
ax.set_xticklabels(CYBERSICKNESS_LABELS, fontsize=8)
ax.set_title('VR Cybersickness', fontweight='bold')
# Godspeed: VR vs others
ax = axes[2]
gs_overall_by_m = gs_df.groupby('Medium')['Overall'].agg(['mean', 'sem'])
for i, m in enumerate(MEDIUMS):
mean_v = gs_overall_by_m.loc[m, 'mean']
sem_v = gs_overall_by_m.loc[m, 'sem']
ax.bar(i, mean_v, yerr=sem_v, capsize=4, color=MED_COLORS[m],
edgecolor='gray', width=0.6)
ax.text(i, mean_v + sem_v + 0.05, f'{mean_v:.2f}', ha='center', fontsize=10, fontweight='bold')
ax.set_xticks(range(3))
ax.set_xticklabels(MEDIUMS)
ax.set_ylabel('Mean Godspeed (1-5)')
ax.set_ylim(1, 5)
ax.set_title('Godspeed: Tutor Impression', fontweight='bold')
fig.suptitle('VR-Specific Comparisons', fontsize=14, fontweight='bold')
fig.tight_layout()
save(fig, 'vr_specific', 'VR-specific: social presence, cybersickness, Godspeed')
# =========================================================================
# Export statistics to CSV
# =========================================================================
def stats_by_medium(data_df, value_col, mediums=MEDIUMS):
rows = []
for m in mediums:
vals = data_df[data_df['Medium'] == m][value_col].dropna()
rows.append({
'Medium': m,
'N': len(vals),
'Mean': vals.mean(),
'SD': vals.std(),
'SEM': vals.sem(),
'Median': vals.median(),
'Min': vals.min(),
'Max': vals.max(),
})
return pd.DataFrame(rows)
# SUS
stats_by_medium(sus_df, 'SUS').to_csv(
STATS_DIR / 'questionnaire_sus_by_medium.csv', index=False, float_format='%.3f')
# IMI subscales (tutoring phase)
imi_rows = []
for sub in IMI_SUBSCALES:
label = sub['label'].replace('\n', ' ')
for m in MEDIUMS:
vals = imi_tutor[imi_tutor['Medium'] == m][sub['label']].dropna()
imi_rows.append({'Subscale': label, 'Phase': 'Tutoring', 'Medium': m,
'N': len(vals), 'Mean': vals.mean(), 'SD': vals.std(), 'SEM': vals.sem()})
for m in MEDIUMS:
vals = imi_read[imi_read['Medium'] == m][sub['label']].dropna()
imi_rows.append({'Subscale': label, 'Phase': 'Reading', 'Medium': m,
'N': len(vals), 'Mean': vals.mean(), 'SD': vals.std(), 'SEM': vals.sem()})
pd.DataFrame(imi_rows).to_csv(
STATS_DIR / 'questionnaire_imi_by_medium.csv', index=False, float_format='%.3f')
# UEQ-S
ueq_rows = []
for dim in ['Pragmatic', 'Hedonic', 'Overall']:
for phase in ['Reading', 'Tutoring']:
for m in MEDIUMS:
vals = ueq_all[(ueq_all['Phase'] == phase) & (ueq_all['Medium'] == m)][dim].dropna()
ueq_rows.append({'Dimension': dim, 'Phase': phase, 'Medium': m,
'N': len(vals), 'Mean': vals.mean(), 'SD': vals.std(), 'SEM': vals.sem()})
pd.DataFrame(ueq_rows).to_csv(
STATS_DIR / 'questionnaire_ueqs_by_medium.csv', index=False, float_format='%.3f')
# NASA-TLX
tlx_rows = []
sub_names_all = NASATLX_LABELS + ['Overall']
for phase in ['Reading', 'Tutoring']:
for m in MEDIUMS:
data = tlx_all[(tlx_all['Phase'] == phase) & (tlx_all['Medium'] == m)]
for s in sub_names_all:
vals = data[s].dropna()
tlx_rows.append({'Subscale': s.replace('\n', ' '), 'Phase': phase, 'Medium': m,
'N': len(vals), 'Mean': vals.mean(), 'SD': vals.std(), 'SEM': vals.sem()})
pd.DataFrame(tlx_rows).to_csv(
STATS_DIR / 'questionnaire_nasatlx_by_medium.csv', index=False, float_format='%.3f')
# Godspeed
gs_rows = []
for subscale in list(GODSPEED_SUBSCALES.keys()) + ['Overall']:
for m in MEDIUMS:
vals = gs_df[gs_df['Medium'] == m][subscale].dropna()
gs_rows.append({'Subscale': subscale.replace('\n', ' '), 'Medium': m,
'N': len(vals), 'Mean': vals.mean(), 'SD': vals.std(), 'SEM': vals.sem()})
pd.DataFrame(gs_rows).to_csv(
STATS_DIR / 'questionnaire_godspeed_by_medium.csv', index=False, float_format='%.3f')
# Social Presence
sp_rows = []
for item_name in SOCIAL_PRESENCE_LABELS + ['Overall']:
for m in MEDIUMS:
vals = sp_items[sp_items['Medium'] == m][item_name].dropna()
sp_rows.append({'Item': item_name, 'Medium': m,
'N': len(vals), 'Mean': vals.mean(), 'SD': vals.std(), 'SEM': vals.sem()})
pd.DataFrame(sp_rows).to_csv(
STATS_DIR / 'questionnaire_social_presence_by_medium.csv', index=False, float_format='%.3f')
# IOS + Extra measures
ios_rows = []
for col_name, col_key in [('IOS', 'IOS'), ('Self_Use', 'self_use'), ('Felt_Helpful', 'felt_helpful')]:
if col_name == 'IOS':
source_df = ios_data.rename(columns={'IOS': col_name})
else:
source_df = pd.DataFrame({
col_name: pd.to_numeric(post_tutor.iloc[:, EXTRA_COLS[col_key]], errors='coerce'),
'Medium': post_tutor['Medium_col']
})
for m in MEDIUMS:
vals = source_df[source_df['Medium'] == m][col_name].dropna()
ios_rows.append({'Measure': col_name, 'Medium': m,
'N': len(vals), 'Mean': vals.mean(), 'SD': vals.std(), 'SEM': vals.sem()})
pd.DataFrame(ios_rows).to_csv(
STATS_DIR / 'questionnaire_additional_by_medium.csv', index=False, float_format='%.3f')
# Correlation matrix with p-values
corr_export_rows = []
for i_c in range(len(corr_cols)):
for j_c in range(len(corr_cols)):
corr_export_rows.append({
'Var1': corr_cols[i_c],
'Var2': corr_cols[j_c],
'r': corr_matrix.iloc[i_c, j_c],
'p': p_matrix.iloc[i_c, j_c],
'sig': ('**' if p_matrix.iloc[i_c, j_c] < 0.01
else '*' if p_matrix.iloc[i_c, j_c] < 0.05 else ''),
})
pd.DataFrame(corr_export_rows).to_csv(
STATS_DIR / 'questionnaire_correlations.csv', index=False, float_format='%.4f')
# Pre-session states
pre_state_rows = []
for item in ['Stressed', 'Ready', 'Relaxed']:
for phase in ['Pre-Reading', 'Pre-Tutoring']:
for m in MEDIUMS:
vals = pre_all[(pre_all['Phase'] == phase) & (pre_all['Medium'] == m)][item].dropna()
pre_state_rows.append({'Item': item, 'Phase': phase, 'Medium': m,
'N': len(vals), 'Mean': vals.mean(), 'SD': vals.std(), 'SEM': vals.sem()})
pd.DataFrame(pre_state_rows).to_csv(
STATS_DIR / 'questionnaire_pre_session_states.csv', index=False, float_format='%.3f')
print(f"\n Stats exported to: {STATS_DIR}")
# =========================================================================
# Print summary statistics
# =========================================================================
print("\n" + "=" * 70)
print("QUESTIONNAIRE SUMMARY STATISTICS")
print("=" * 70)
# SUS
print("\nSUS Scores by Medium:")
for m in MEDIUMS:
vals = sus_df[sus_df['Medium'] == m]['SUS'].dropna()
print(f" {m}: M={vals.mean():.1f}, SD={vals.std():.1f}, Median={vals.median():.1f}")
# IMI
print("\nIMI by Medium (Tutoring):")
for sub in IMI_SUBSCALES:
label = sub['label'].replace('\n', ' ')
for m in MEDIUMS:
vals = imi_tutor[imi_tutor['Medium'] == m][sub['label']].dropna()
print(f" {label}/{m}: M={vals.mean():.2f}, SD={vals.std():.2f}")
# UEQ-S
print("\nUEQ-S Overall by Medium (Tutoring):")
for m in MEDIUMS:
vals = ueq_tutor[ueq_tutor['Medium'] == m]['Overall'].dropna()
print(f" {m}: M={vals.mean():.2f}, SD={vals.std():.2f}")
# NASA-TLX
print("\nNASA-TLX Overall by Medium (Tutoring):")
for m in MEDIUMS:
vals = tlx_all[(tlx_all['Phase'] == 'Tutoring') & (tlx_all['Medium'] == m)]['Overall'].dropna()
print(f" {m}: M={vals.mean():.2f}, SD={vals.std():.2f}")
# Social Presence
print("\nSocial Presence Overall by Medium:")
for m in MEDIUMS:
vals = sp_items[sp_items['Medium'] == m]['Overall'].dropna()
print(f" {m}: M={vals.mean():.2f}, SD={vals.std():.2f}")
# Godspeed
print("\nGodspeed Overall by Medium:")
for m in MEDIUMS:
vals = gs_df[gs_df['Medium'] == m]['Overall'].dropna()
print(f" {m}: M={vals.mean():.2f}, SD={vals.std():.2f}")
# IOS
print("\nIOS by Medium:")
for m in MEDIUMS:
vals = ios_data[ios_data['Medium'] == m]['IOS'].dropna()
print(f" {m}: M={vals.mean():.2f}, SD={vals.std():.2f}")
print(f"\n{plot_num} plots saved to: {PLOT_DIR}")
if __name__ == "__main__":
main()