""" generate_plots_questionnaires.py Analyses all questionnaire data and generates plots: - IMI (Intrinsic Motivation Inventory) subscales - SUS (System Usability Scale) scores - UEQ-S (User Experience Questionnaire – Short) pragmatic/hedonic - NASA-TLX workload subscales - Godspeed tutor impression - Social Presence (Legacy, 5 items) - Cybersickness (tutoring only, 5 items) - Stress / Readiness / Relaxation (Pre-test items) - IOS (Inclusion of Other in Self) - Reading vs Tutoring phase comparisons - Correlations between questionnaire subscales and learning gains Output: Data/plots_questionnaires/*.png """ import pandas as pd import numpy as np import matplotlib.pyplot as plt import matplotlib.ticker as ticker import seaborn as sns from pathlib import Path from scipy import stats # ============================================================================= # PATHS & CONSTANTS # ============================================================================= BASE = Path(r"F:\GitHub Projekte\VirTu-Eval\Data") PLOT_DIR = BASE / "plots_questionnaires" PLOT_DIR.mkdir(exist_ok=True) STATS_DIR = BASE / "stats" STATS_DIR.mkdir(exist_ok=True) MEDIUMS = ['Chat', 'Video', 'VR'] MED_COLORS = {'Chat': '#42A5F5', 'Video': '#FFA726', 'VR': '#66BB6A'} sns.set_theme(style="whitegrid", font_scale=1.05) plt.rcParams['figure.dpi'] = 150 plt.rcParams['savefig.bbox'] = 'tight' # ============================================================================= # IMI SUBSCALE DEFINITIONS (indices into Post-Questionnaire cols 2-27) # # The 26 IMI items span columns 2-27 of both Reading and Tutoring post- # questionnaires. Column 4 ("Ich fühle mich gestresst") is a stress item # embedded among the IMI items, treated separately. # # IMI subscale mapping (1-indexed within IMI block, i.e. col 2 = item 1): # Interest/Enjoyment: items 4,6,8,10,12,14,16 (cols 5,7,9,11,13,15,17 - 0-indexed from file but relative to block) # Value/Usefulness: items 1,3,5,7,9,11,13 (cols 2,4,6,8,10,12,14 - but col 4 is stress, skip) # Perceived Choice: items 2,15,17,19,21,23,25 (cols 3,16,18,20,22,24,26) # # Actually, let me map by the actual German text and standard IMI subscales: # ============================================================================= # IMI items are at file columns 2-27 (26 items). # Col 4 = second "stressed" item → treat as separate relaxed/stress item, not IMI. # So IMI = 25 items at cols [2,3,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27] # Standard IMI subscales by column index in the file (0-based): # Interest/Enjoyment: 7(Spaß), 9(genossen), 13(genoss), 17(interessant), 19(angenehm), 25(spaßvoll), 5(dachte wie sehr genoss) # → reverse: 14(langweilig) IMI_INTEREST = { 'items': [5, 7, 9, 13, 17, 19, 25], 'reverse': [14], 'label': 'Interest/\nEnjoyment' } # Value/Usefulness: 2(Wert), 6(Konzentration), 8(Verbesserung), 12(wichtig), 15(Lerngewohnheiten), 18(nützlich), 21(Nutzen), 23(Schule), 27(Wert) IMI_VALUE = { 'items': [2, 6, 8, 12, 15, 18, 21, 23, 27], 'reverse': [], 'label': 'Value/\nUsefulness' } # Perceived Choice: 3(Wahlmöglichkeit), 11(weil wollte), 24(Wahl hatte) # → reverse: 10(keine Wahl), 16(keine andere Wahl), 20(musste), 22(weil musste), 26(nicht eigene Entscheidung) IMI_CHOICE = { 'items': [3, 11, 24], 'reverse': [10, 16, 20, 22, 26], 'label': 'Perceived\nChoice' } IMI_SUBSCALES = [IMI_INTEREST, IMI_VALUE, IMI_CHOICE] # SUS items: cols 32-41 in Post-Questionnaire-Tutoring (and NOT in Reading) SUS_COLS = list(range(32, 42)) # 10 items # UEQ-S items: cols 47-54 in Post-Questionnaire-Tutoring (8 items "Die Interaktion war:") # In Post-Questionnaire-Reading: cols 28-35 (8 items "Die Erfahrung war:") UEQS_COLS_TUTORING = list(range(47, 55)) # 8 items UEQS_COLS_READING = list(range(28, 36)) # 8 items # UEQ-S: items 1,2,3,4 = pragmatic quality; items 5,6,7,8 = hedonic quality UEQS_PRAGMATIC = [0, 1, 2, 3] # relative indices within the 8 UEQ items UEQS_HEDONIC = [4, 5, 6, 7] # NASA-TLX: 6 items # In Reading: cols 36-41 # In Tutoring: cols 86-91 NASATLX_COLS_READING = list(range(36, 42)) NASATLX_COLS_TUTORING = list(range(86, 92)) NASATLX_LABELS = ['Mental\nDemand', 'Physical\nDemand', 'Temporal\nDemand', 'Performance', 'Effort', 'Frustration'] # Godspeed: cols 56-79 in Post-Questionnaire-Tutoring (24 items) GODSPEED_COLS = list(range(56, 80)) # Godspeed subscales (5 subscales, standard order): # Anthropomorphism (5): items 1-5 → cols 56-60 # Animacy (6): items 6-11 → cols 61-66 # Likeability (5): items 12-16 → cols 67-71 (note: col 67 has trailing space typo in header) # Perceived Intelligence (5): items 17-21 → cols 72-76 # Perceived Safety (3): items 22-24 → cols 77-79 GODSPEED_SUBSCALES = { 'Anthropo-\nmorphism': list(range(56, 61)), 'Animacy': list(range(61, 67)), 'Like-\nability': list(range(67, 72)), 'Perceived\nIntelligence': list(range(72, 77)), 'Perceived\nSafety': list(range(77, 80)), } # Social Presence (Legacy, 5 items): cols 80-84 SOCIAL_PRESENCE_COLS = list(range(80, 85)) SOCIAL_PRESENCE_LABELS = ['Face-to-face', 'Same room', 'Being watched', 'Aware of me', 'Tutor present'] # Cybersickness: cols 42-46 (5 items, tutoring only) CYBERSICKNESS_COLS = list(range(42, 47)) CYBERSICKNESS_LABELS = ['General\nDiscomfort', 'Fatigue', 'Headache', 'Eye\nStrain', 'Difficulty\nConcentrating'] # IOS (Inclusion of Other in Self): col 28 in tutoring IOS_COL = 28 # Extra items: cols 29-31 EXTRA_COLS = {'self_use': 29, 'felt_helpful': 30, 'session_length': 31} # Stress items STRESS_COL = 1 # "Ich fühle mich gestresst" (both pre/post) READY_COL = 2 # Pre-test: "Ich fühle mich bereit" RELAXED_COL = 3 # Pre-test: "Ich fühle mich entspannt" # Recommend tutor: col 85 RECOMMEND_COL = 85 # BFI-15 cols in Final-Questionnaire: cols 1-15 BFI_COLS = list(range(1, 16)) BFI_TRAITS = { 'Neuroticism': {'items': [1, 2], 'reverse': [3]}, 'Extraversion': {'items': [4, 5], 'reverse': [6]}, 'Openness': {'items': [7, 8, 9], 'reverse': []}, 'Agreeableness': {'items': [11, 12], 'reverse': [10]}, 'Conscientiousness': {'items': [13, 15], 'reverse': [14]}, } # ============================================================================= # DATA LOADING # ============================================================================= def load_csv(filename): """Load a CSV, return DataFrame with all columns by position (iloc).""" path = BASE / filename df = pd.read_csv(path, encoding='utf-8-sig') return df def safe_numeric(series): """Convert series to numeric, coercing errors to NaN.""" return pd.to_numeric(series, errors='coerce') def compute_imi(df, subscale, offset=0): """Compute IMI subscale mean from df using column indices. offset: shift all indices if the columns start at a different position.""" items = [df.iloc[:, i + offset] for i in subscale['items']] reverse = [8 - df.iloc[:, i + offset] for i in subscale['reverse']] # 7-point: reverse = 8 - x all_items = [safe_numeric(s) for s in items + reverse] return pd.concat(all_items, axis=1).mean(axis=1) def compute_sus(df, cols): """Compute SUS score (0-100) from 10 items at given column indices. Odd items (1,3,5,7,9): score = response - 1 Even items (2,4,6,8,10): score = 5 - response SUS = sum * 2.5""" scores = [] for i, col_idx in enumerate(cols): val = safe_numeric(df.iloc[:, col_idx]) if i % 2 == 0: # odd items (0-indexed even) scores.append(val - 1) else: # even items (0-indexed odd) scores.append(5 - val) return pd.concat(scores, axis=1).sum(axis=1) * 2.5 def compute_ueqs(df, cols): """Compute UEQ-S pragmatic and hedonic quality. Items are on 1-7 scale, centered to -3 to +3.""" vals = pd.concat([safe_numeric(df.iloc[:, c]) for c in cols], axis=1) centered = vals - 4 # center: 1→-3, 4→0, 7→3 pragmatic = centered.iloc[:, UEQS_PRAGMATIC].mean(axis=1) hedonic = centered.iloc[:, UEQS_HEDONIC].mean(axis=1) overall = centered.mean(axis=1) return pragmatic, hedonic, overall def compute_nasatlx(df, cols): """Compute NASA-TLX subscales and overall workload. All items 1-7 scale. Item 4 (Performance/success) is inverted: high = good.""" subs = {} for i, label in enumerate(NASATLX_LABELS): val = safe_numeric(df.iloc[:, cols[i]]) if i == 3: # Performance — reverse so high = high workload subs[label] = 8 - val else: subs[label] = val overall = pd.DataFrame(subs).mean(axis=1) return subs, overall def compute_godspeed(df): """Compute Godspeed subscales from tutoring questionnaire.""" result = {} for name, cols in GODSPEED_SUBSCALES.items(): vals = pd.concat([safe_numeric(df.iloc[:, c]) for c in cols], axis=1) result[name] = vals.mean(axis=1) result['Overall'] = pd.concat([safe_numeric(df.iloc[:, c]) for c in GODSPEED_COLS], axis=1).mean(axis=1) return result def compute_social_presence(df): """Compute social presence from 5 items.""" vals = pd.concat([safe_numeric(df.iloc[:, c]) for c in SOCIAL_PRESENCE_COLS], axis=1) return vals, vals.mean(axis=1) def compute_bfi(df): """Compute BFI-15 trait scores.""" traits = {} for trait, spec in BFI_TRAITS.items(): items = [safe_numeric(df.iloc[:, i]) for i in spec['items']] reverse = [8 - safe_numeric(df.iloc[:, i]) for i in spec['reverse']] all_items = items + reverse traits[trait] = pd.concat(all_items, axis=1).mean(axis=1) return traits # ============================================================================= # MAIN # ============================================================================= def main(): print("Loading questionnaire data...") # Load all questionnaires pre_read = load_csv("Pre-Test-Reading.csv") post_read = load_csv("Post-Questionnaire-Reading.csv") pre_tutor = load_csv("Pre-Test-Tutoring.csv") post_tutor = load_csv("Post-Questionnaire-Tutoring.csv") final = load_csv("Final-Questionnaire.csv") test_scores = load_csv("test_scores_all.csv") # Normalize Zeitpunkt in test scores test_scores['Zeitpunkt'] = test_scores['Zeitpunkt'].str.strip().replace('Pre-Tutor', 'Pre-Tutoring') # Add Medium/Participant columns as named references # IMPORTANT: capture values BEFORE adding new columns, because iloc[:, -1] # shifts when new columns are appended to the DataFrame. for df in [post_read, post_tutor, pre_read, pre_tutor]: med_vals = df.iloc[:, -2].values.copy() part_vals = df.iloc[:, -1].values.copy() df['Medium_col'] = med_vals df['Participant_col'] = part_vals print(f" Pre-Reading: {len(pre_read)} rows") print(f" Post-Reading: {len(post_read)} rows") print(f" Pre-Tutoring: {len(pre_tutor)} rows") print(f" Post-Tutoring:{len(post_tutor)} rows") print(f" Final: {len(final)} rows") plot_num = 0 def save(fig, name, desc): nonlocal plot_num plot_num += 1 tag = f"Q{plot_num:02d}" fname = f"{tag}_{name}.png" fig.savefig(PLOT_DIR / fname) plt.close(fig) print(f" [{tag}] {desc}") # ========================================================================= # Q01: IMI Subscales — Reading vs Tutoring by Medium # ========================================================================= print("\nA. Intrinsic Motivation (IMI)") # Compute IMI for reading phase imi_read = pd.DataFrame({ sub['label']: compute_imi(post_read, sub) for sub in IMI_SUBSCALES }) imi_read['Medium'] = post_read['Medium_col'] imi_read['Phase'] = 'Reading' # Compute IMI for tutoring phase imi_tutor = pd.DataFrame({ sub['label']: compute_imi(post_tutor, sub) for sub in IMI_SUBSCALES }) imi_tutor['Medium'] = post_tutor['Medium_col'] imi_tutor['Phase'] = 'Tutoring' imi_all = pd.concat([imi_read, imi_tutor], ignore_index=True) fig, axes = plt.subplots(1, 3, figsize=(15, 5), sharey=True) for ax, sub in zip(axes, IMI_SUBSCALES): label = sub['label'] data_r = imi_all[imi_all['Phase'] == 'Reading'] data_t = imi_all[imi_all['Phase'] == 'Tutoring'] x = np.arange(3) width = 0.35 means_r = [data_r[data_r['Medium'] == m][label].mean() for m in MEDIUMS] means_t = [data_t[data_t['Medium'] == m][label].mean() for m in MEDIUMS] sems_r = [data_r[data_r['Medium'] == m][label].sem() for m in MEDIUMS] sems_t = [data_t[data_t['Medium'] == m][label].sem() for m in MEDIUMS] bars_r = ax.bar(x - width/2, means_r, width, yerr=sems_r, capsize=3, label='Reading', color='#BBDEFB', edgecolor='#1565C0', linewidth=1) bars_t = ax.bar(x + width/2, means_t, width, yerr=sems_t, capsize=3, label='Tutoring', color='#C8E6C9', edgecolor='#2E7D32', linewidth=1) ax.set_xticks(x) ax.set_xticklabels(MEDIUMS) ax.set_title(label.replace('\n', ' '), fontsize=12, fontweight='bold') ax.set_ylim(1, 7) ax.yaxis.set_major_locator(ticker.MultipleLocator(1)) if ax == axes[0]: ax.set_ylabel('Mean Score (1-7)') ax.legend(fontsize=9) fig.suptitle('IMI Subscales: Reading vs Tutoring by Medium', fontsize=14, fontweight='bold') fig.tight_layout() save(fig, 'imi_by_medium', 'IMI subscales by medium (Reading vs Tutoring)') # ========================================================================= # Q02: SUS by Medium (Tutoring only) # ========================================================================= print("\nB. System Usability (SUS)") sus_scores = compute_sus(post_tutor, SUS_COLS) sus_df = pd.DataFrame({'SUS': sus_scores, 'Medium': post_tutor['Medium_col']}) fig, ax = plt.subplots(figsize=(8, 5)) for i, m in enumerate(MEDIUMS): vals = sus_df[sus_df['Medium'] == m]['SUS'].dropna() bp = ax.boxplot([vals], positions=[i], widths=0.5, patch_artist=True, boxprops=dict(facecolor=MED_COLORS[m], alpha=0.6), medianprops=dict(color='black', linewidth=2)) ax.scatter([i]*len(vals), vals, color=MED_COLORS[m], alpha=0.7, zorder=3, s=40) ax.text(i, vals.mean() + 2, f'M={vals.mean():.1f}', ha='center', fontsize=10, fontweight='bold') ax.set_xticks(range(3)) ax.set_xticklabels(MEDIUMS, fontsize=12) ax.set_ylabel('SUS Score (0-100)') ax.set_ylim(0, 105) ax.axhline(68, color='gray', ls='--', alpha=0.5, label='Average threshold (68)') ax.axhline(80, color='green', ls='--', alpha=0.3, label='Good threshold (80)') ax.legend(fontsize=9) ax.set_title('System Usability Scale (SUS) by Medium — Tutoring Phase', fontsize=13, fontweight='bold') fig.tight_layout() save(fig, 'sus_by_medium', 'SUS scores by medium') # ========================================================================= # Q03: UEQ-S — Pragmatic vs Hedonic, Reading vs Tutoring # ========================================================================= print("\nC. User Experience (UEQ-S)") # Reading prag_r, hed_r, overall_r = compute_ueqs(post_read, UEQS_COLS_READING) ueq_read = pd.DataFrame({'Pragmatic': prag_r, 'Hedonic': hed_r, 'Overall': overall_r, 'Medium': post_read['Medium_col'], 'Phase': 'Reading'}) # Tutoring prag_t, hed_t, overall_t = compute_ueqs(post_tutor, UEQS_COLS_TUTORING) ueq_tutor = pd.DataFrame({'Pragmatic': prag_t, 'Hedonic': hed_t, 'Overall': overall_t, 'Medium': post_tutor['Medium_col'], 'Phase': 'Tutoring'}) ueq_all = pd.concat([ueq_read, ueq_tutor], ignore_index=True) fig, axes = plt.subplots(1, 3, figsize=(15, 5), sharey=True) for ax, dim in zip(axes, ['Pragmatic', 'Hedonic', 'Overall']): x = np.arange(3) width = 0.35 means_r = [ueq_all[(ueq_all['Phase']=='Reading') & (ueq_all['Medium']==m)][dim].mean() for m in MEDIUMS] means_t = [ueq_all[(ueq_all['Phase']=='Tutoring') & (ueq_all['Medium']==m)][dim].mean() for m in MEDIUMS] sems_r = [ueq_all[(ueq_all['Phase']=='Reading') & (ueq_all['Medium']==m)][dim].sem() for m in MEDIUMS] sems_t = [ueq_all[(ueq_all['Phase']=='Tutoring') & (ueq_all['Medium']==m)][dim].sem() for m in MEDIUMS] ax.bar(x - width/2, means_r, width, yerr=sems_r, capsize=3, label='Reading', color='#BBDEFB', edgecolor='#1565C0', linewidth=1) ax.bar(x + width/2, means_t, width, yerr=sems_t, capsize=3, label='Tutoring', color='#C8E6C9', edgecolor='#2E7D32', linewidth=1) ax.set_xticks(x) ax.set_xticklabels(MEDIUMS) ax.set_title(dim, fontsize=12, fontweight='bold') ax.set_ylim(-3, 3) ax.axhline(0, color='black', ls='-', lw=0.5) ax.axhline(0.8, color='green', ls=':', alpha=0.4, label='Good (>0.8)' if dim == 'Overall' else None) ax.axhline(-0.8, color='red', ls=':', alpha=0.4, label='Bad (<-0.8)' if dim == 'Overall' else None) if ax == axes[0]: ax.set_ylabel('UEQ-S Score (-3 to +3)') ax.legend(fontsize=8) if ax == axes[2]: ax.legend(fontsize=8) fig.suptitle('UEQ-S: Pragmatic & Hedonic Quality — Reading vs Tutoring', fontsize=14, fontweight='bold') fig.tight_layout() save(fig, 'ueqs_by_medium', 'UEQ-S pragmatic/hedonic by medium') # ========================================================================= # Q04: NASA-TLX — Reading vs Tutoring # ========================================================================= print("\nD. Workload (NASA-TLX)") tlx_r_subs, tlx_r_overall = compute_nasatlx(post_read, NASATLX_COLS_READING) tlx_r = pd.DataFrame(tlx_r_subs) tlx_r['Overall'] = tlx_r_overall tlx_r['Medium'] = post_read['Medium_col'] tlx_r['Phase'] = 'Reading' tlx_t_subs, tlx_t_overall = compute_nasatlx(post_tutor, NASATLX_COLS_TUTORING) tlx_t = pd.DataFrame(tlx_t_subs) tlx_t['Overall'] = tlx_t_overall tlx_t['Medium'] = post_tutor['Medium_col'] tlx_t['Phase'] = 'Tutoring' tlx_all = pd.concat([tlx_r, tlx_t], ignore_index=True) # NASA-TLX: direct medium comparison for tutoring phase (grouped bar) fig, ax = plt.subplots(figsize=(12, 5)) sub_names = NASATLX_LABELS + ['Overall'] x = np.arange(len(sub_names)) width = 0.25 for i, m in enumerate(MEDIUMS): data_t = tlx_all[(tlx_all['Phase'] == 'Tutoring') & (tlx_all['Medium'] == m)] means = [data_t[s].mean() for s in sub_names] sems = [data_t[s].sem() for s in sub_names] ax.bar(x + (i - 1) * width, means, width, yerr=sems, capsize=2, label=m, color=MED_COLORS[m], edgecolor='gray', linewidth=0.5) ax.set_xticks(x) ax.set_xticklabels(sub_names, fontsize=10) ax.set_ylim(1, 7) ax.set_ylabel('Workload Rating (1-7)') ax.legend(fontsize=10) ax.set_title('NASA-TLX Workload by Medium — Tutoring Phase', fontsize=13, fontweight='bold') fig.tight_layout() save(fig, 'nasatlx_by_medium', 'NASA-TLX by medium (Tutoring)') # NASA-TLX: Reading vs Tutoring comparison (all mediums combined + per-medium) fig, axes = plt.subplots(1, 2, figsize=(14, 5)) # Left: Overall workload Reading vs Tutoring per medium ax = axes[0] x = np.arange(3) width = 0.35 for phase_idx, (phase, color, edge) in enumerate([ ('Reading', '#BBDEFB', '#1565C0'), ('Tutoring', '#C8E6C9', '#2E7D32') ]): means = [tlx_all[(tlx_all['Phase']==phase) & (tlx_all['Medium']==m)]['Overall'].mean() for m in MEDIUMS] sems = [tlx_all[(tlx_all['Phase']==phase) & (tlx_all['Medium']==m)]['Overall'].sem() for m in MEDIUMS] offset = -width/2 if phase_idx == 0 else width/2 ax.bar(x + offset, means, width, yerr=sems, capsize=3, label=phase, color=color, edgecolor=edge, linewidth=1) ax.set_xticks(x) ax.set_xticklabels(MEDIUMS, fontsize=11) ax.set_ylim(1, 7) ax.set_ylabel('Overall Workload (1-7)') ax.set_title('Overall Workload: Reading vs Tutoring', fontweight='bold') ax.legend(fontsize=9) # Right: Per-subscale comparison (Tutoring only, all mediums overlaid) ax = axes[1] sub_only = NASATLX_LABELS x2 = np.arange(len(sub_only)) width2 = 0.25 for i, m in enumerate(MEDIUMS): data_t = tlx_all[(tlx_all['Phase'] == 'Tutoring') & (tlx_all['Medium'] == m)] means = [data_t[s].mean() for s in sub_only] ax.plot(x2, means, 'o-', color=MED_COLORS[m], label=m, linewidth=2, markersize=8) ax.set_xticks(x2) ax.set_xticklabels(sub_only, fontsize=8, rotation=20, ha='right') ax.set_ylim(1, 7) ax.set_ylabel('Rating (1-7)') ax.set_title('Subscale Profiles by Medium (Tutoring)', fontweight='bold') ax.legend(fontsize=9) fig.suptitle('NASA-TLX Workload Comparison', fontsize=14, fontweight='bold') fig.tight_layout() save(fig, 'nasatlx_comparison', 'NASA-TLX reading vs tutoring comparison') # ========================================================================= # Q05: Godspeed Subscales by Medium (Tutoring only) # ========================================================================= print("\nE. Tutor Impression (Godspeed)") godspeed = compute_godspeed(post_tutor) gs_df = pd.DataFrame(godspeed) gs_df['Medium'] = post_tutor['Medium_col'] gs_names = list(GODSPEED_SUBSCALES.keys()) + ['Overall'] fig, ax = plt.subplots(figsize=(12, 5)) x = np.arange(len(gs_names)) width = 0.25 for i, m in enumerate(MEDIUMS): data = gs_df[gs_df['Medium'] == m] means = [data[s].mean() for s in gs_names] sems = [data[s].sem() for s in gs_names] ax.bar(x + (i - 1) * width, means, width, yerr=sems, capsize=2, label=m, color=MED_COLORS[m], edgecolor='gray', linewidth=0.5) ax.set_xticks(x) ax.set_xticklabels(gs_names, fontsize=10) ax.set_ylim(1, 5) ax.set_ylabel('Mean Rating (1-5)') ax.legend() ax.set_title('Godspeed Tutor Impression by Medium — Tutoring Phase', fontsize=13, fontweight='bold') fig.tight_layout() save(fig, 'godspeed_by_medium', 'Godspeed tutor impression by medium') # ========================================================================= # Q06: Social Presence by Medium (Tutoring only) # ========================================================================= print("\nF. Social Presence") sp_items, sp_overall = compute_social_presence(post_tutor) sp_items.columns = SOCIAL_PRESENCE_LABELS sp_items['Overall'] = sp_overall sp_items['Medium'] = post_tutor['Medium_col'].values fig, ax = plt.subplots(figsize=(12, 5)) sp_names = SOCIAL_PRESENCE_LABELS + ['Overall'] x = np.arange(len(sp_names)) width = 0.25 for i, m in enumerate(MEDIUMS): data = sp_items[sp_items['Medium'] == m] means = [data[s].mean() for s in sp_names] sems = [data[s].sem() for s in sp_names] ax.bar(x + (i-1)*width, means, width, yerr=sems, capsize=2, label=m, color=MED_COLORS[m], edgecolor='gray', linewidth=0.5) ax.set_xticks(x) ax.set_xticklabels(sp_names, fontsize=9) ax.set_ylim(1, 5) ax.set_ylabel('Rating (1-5)') ax.legend() ax.set_title('Social Presence (Legacy) by Medium — Tutoring Phase\n(Only filled by participants who wore Meta Quest Pro; N varies by medium)', fontsize=11, fontweight='bold') fig.tight_layout() save(fig, 'social_presence_by_medium', 'Social presence by medium') # ========================================================================= # Q07: Cybersickness by Medium (VR only, but show all for comparison) # ========================================================================= print("\nG. Cybersickness") cyber = pd.DataFrame({ label: safe_numeric(post_tutor.iloc[:, col]) for label, col in zip(CYBERSICKNESS_LABELS, CYBERSICKNESS_COLS) }) # Recode: "Nein" → 0 (no symptom), "Ja" → 1 (has symptom)... wait, these might be Yes/No # Check the actual values — they seem to be "Ja"/"Nein" strings # Let me handle both cases for label in CYBERSICKNESS_LABELS: col_data = post_tutor.iloc[:, CYBERSICKNESS_COLS[CYBERSICKNESS_LABELS.index(label)]] if col_data.dtype == object: # String data: map Ja=1, Nein=0 cyber[label] = col_data.map({'Ja': 1, 'Nein': 0, 'ja': 1, 'nein': 0}) else: cyber[label] = safe_numeric(col_data) cyber['Medium'] = post_tutor['Medium_col'].values # Check if binary or scale is_binary = cyber[CYBERSICKNESS_LABELS[0]].dropna().isin([0, 1]).all() fig, ax = plt.subplots(figsize=(10, 5)) x = np.arange(len(CYBERSICKNESS_LABELS)) width = 0.25 if is_binary: # Show percentage reporting symptoms for i, m in enumerate(MEDIUMS): data = cyber[cyber['Medium'] == m] pcts = [(data[s] == 1).sum() / len(data) * 100 for s in CYBERSICKNESS_LABELS] ax.bar(x + (i-1)*width, pcts, width, label=m, color=MED_COLORS[m], edgecolor='gray', linewidth=0.5) ax.set_ylabel('% Reporting Symptom') ax.set_ylim(0, 100) else: for i, m in enumerate(MEDIUMS): data = cyber[cyber['Medium'] == m] means = [data[s].mean() for s in CYBERSICKNESS_LABELS] sems = [data[s].sem() for s in CYBERSICKNESS_LABELS] ax.bar(x + (i-1)*width, means, width, yerr=sems, capsize=2, label=m, color=MED_COLORS[m], edgecolor='gray', linewidth=0.5) ax.set_ylabel('Severity Rating') ax.set_xticks(x) ax.set_xticklabels(CYBERSICKNESS_LABELS, fontsize=9) ax.legend() ax.set_title('Cybersickness Symptoms by Medium — Tutoring Phase', fontsize=13, fontweight='bold') fig.tight_layout() save(fig, 'cybersickness_by_medium', 'Cybersickness by medium') # ========================================================================= # Q08: Stress / Readiness / Relaxation — Pre-Reading vs Pre-Tutoring # ========================================================================= print("\nH. Pre-Session States") pre_r_df = pd.DataFrame({ 'Stressed': safe_numeric(pre_read.iloc[:, STRESS_COL]), 'Ready': safe_numeric(pre_read.iloc[:, READY_COL]), 'Relaxed': safe_numeric(pre_read.iloc[:, RELAXED_COL]), 'Medium': pre_read['Medium_col'], 'Phase': 'Pre-Reading' }) pre_t_df = pd.DataFrame({ 'Stressed': safe_numeric(pre_tutor.iloc[:, STRESS_COL]), 'Ready': safe_numeric(pre_tutor.iloc[:, READY_COL]), 'Relaxed': safe_numeric(pre_tutor.iloc[:, RELAXED_COL]), 'Medium': pre_tutor['Medium_col'], 'Phase': 'Pre-Tutoring' }) pre_all = pd.concat([pre_r_df, pre_t_df], ignore_index=True) fig, axes = plt.subplots(1, 3, figsize=(15, 5), sharey=True) for ax, item in zip(axes, ['Stressed', 'Ready', 'Relaxed']): x = np.arange(3) width = 0.35 means_r = [pre_all[(pre_all['Phase']=='Pre-Reading') & (pre_all['Medium']==m)][item].mean() for m in MEDIUMS] means_t = [pre_all[(pre_all['Phase']=='Pre-Tutoring') & (pre_all['Medium']==m)][item].mean() for m in MEDIUMS] sems_r = [pre_all[(pre_all['Phase']=='Pre-Reading') & (pre_all['Medium']==m)][item].sem() for m in MEDIUMS] sems_t = [pre_all[(pre_all['Phase']=='Pre-Tutoring') & (pre_all['Medium']==m)][item].sem() for m in MEDIUMS] ax.bar(x - width/2, means_r, width, yerr=sems_r, capsize=3, label='Pre-Reading', color='#BBDEFB', edgecolor='#1565C0') ax.bar(x + width/2, means_t, width, yerr=sems_t, capsize=3, label='Pre-Tutoring', color='#C8E6C9', edgecolor='#2E7D32') ax.set_xticks(x) ax.set_xticklabels(MEDIUMS) ax.set_title(item, fontsize=12, fontweight='bold') ax.set_ylim(1, 7) if ax == axes[0]: ax.set_ylabel('Rating (1-7)') ax.legend(fontsize=9) fig.suptitle('Pre-Session State: Reading vs Tutoring by Medium', fontsize=14, fontweight='bold') fig.tight_layout() save(fig, 'pre_session_states', 'Pre-session stress/readiness/relaxation') # ========================================================================= # Q09: IOS (Inclusion of Other in Self) by Medium # ========================================================================= print("\nI. Additional Measures") ios_data = pd.DataFrame({ 'IOS': safe_numeric(post_tutor.iloc[:, IOS_COL]), 'Medium': post_tutor['Medium_col'] }) fig, axes = plt.subplots(1, 3, figsize=(15, 4)) # IOS ax = axes[0] for i, m in enumerate(MEDIUMS): vals = ios_data[ios_data['Medium'] == m]['IOS'].dropna() bp = ax.boxplot([vals], positions=[i], widths=0.5, patch_artist=True, boxprops=dict(facecolor=MED_COLORS[m], alpha=0.6), medianprops=dict(color='black', linewidth=2)) ax.scatter([i]*len(vals), vals, color=MED_COLORS[m], alpha=0.7, zorder=3, s=30) ax.text(i, vals.mean() + 0.3, f'M={vals.mean():.2f}', ha='center', fontsize=9, fontweight='bold') ax.set_xticks(range(3)) ax.set_xticklabels(MEDIUMS) ax.set_ylim(0.5, 7.5) ax.set_ylabel('Closeness (1-7)') ax.set_title('IOS: Closeness to Tutor', fontsize=11, fontweight='bold') # Self-use willingness ax = axes[1] self_use = pd.DataFrame({ 'Value': safe_numeric(post_tutor.iloc[:, EXTRA_COLS['self_use']]), 'Medium': post_tutor['Medium_col'] }) for i, m in enumerate(MEDIUMS): vals = self_use[self_use['Medium'] == m]['Value'].dropna() bp = ax.boxplot([vals], positions=[i], widths=0.5, patch_artist=True, boxprops=dict(facecolor=MED_COLORS[m], alpha=0.6), medianprops=dict(color='black', linewidth=2)) ax.scatter([i]*len(vals), vals, color=MED_COLORS[m], alpha=0.7, zorder=3, s=30) ax.text(i, vals.mean() + 0.3, f'M={vals.mean():.2f}', ha='center', fontsize=9, fontweight='bold') ax.set_xticks(range(3)) ax.set_xticklabels(MEDIUMS) ax.set_ylim(0.5, 7.5) ax.set_ylabel('Agreement (1-7)') ax.set_title('Would Use This Method', fontsize=11, fontweight='bold') # Felt helpful ax = axes[2] helpful = pd.DataFrame({ 'Value': safe_numeric(post_tutor.iloc[:, EXTRA_COLS['felt_helpful']]), 'Medium': post_tutor['Medium_col'] }) for i, m in enumerate(MEDIUMS): vals = helpful[helpful['Medium'] == m]['Value'].dropna() bp = ax.boxplot([vals], positions=[i], widths=0.5, patch_artist=True, boxprops=dict(facecolor=MED_COLORS[m], alpha=0.6), medianprops=dict(color='black', linewidth=2)) ax.scatter([i]*len(vals), vals, color=MED_COLORS[m], alpha=0.7, zorder=3, s=30) ax.text(i, vals.mean() + 0.3, f'M={vals.mean():.2f}', ha='center', fontsize=9, fontweight='bold') ax.set_xticks(range(3)) ax.set_xticklabels(MEDIUMS) ax.set_ylim(0.5, 7.5) ax.set_ylabel('Agreement (1-7)') ax.set_title('Felt Helpful for Review', fontsize=11, fontweight='bold') fig.suptitle('Additional Tutoring Measures by Medium', fontsize=14, fontweight='bold') fig.tight_layout() save(fig, 'additional_measures', 'IOS, self-use, helpfulness by medium') # ========================================================================= # Q10: Correlation Heatmap — Questionnaire Subscales vs Learning Gains # ========================================================================= print("\nJ. Correlations") # Build per-participant-topic tutoring gain pre_scores = test_scores[test_scores['Zeitpunkt'] == 'Pre-Tutoring'].set_index(['Participant', 'Topic']) post_scores = test_scores[test_scores['Zeitpunkt'] == 'Post-Tutoring'].set_index(['Participant', 'Topic']) common_idx = pre_scores.index.intersection(post_scores.index) gains = pd.DataFrame({ 'Participant': [idx[0] for idx in common_idx], 'Topic': [idx[1] for idx in common_idx], 'Score_Gain': post_scores.loc[common_idx, 'Score_Pct'].astype(float).values - pre_scores.loc[common_idx, 'Score_Pct'].astype(float).values, 'Medium': post_scores.loc[common_idx, 'Medium'].values, }) # Compute questionnaire scores per row of post_tutor and join with gains q_scores = pd.DataFrame({ 'Participant': post_tutor['Participant_col'].values, 'Medium': post_tutor['Medium_col'].values, 'IMI_Interest': compute_imi(post_tutor, IMI_INTEREST), 'IMI_Value': compute_imi(post_tutor, IMI_VALUE), 'IMI_Choice': compute_imi(post_tutor, IMI_CHOICE), 'SUS': compute_sus(post_tutor, SUS_COLS), 'NASA_TLX': compute_nasatlx(post_tutor, NASATLX_COLS_TUTORING)[1], 'Social_Presence': compute_social_presence(post_tutor)[1], 'IOS': safe_numeric(post_tutor.iloc[:, IOS_COL]), }) # Add Godspeed overall gs = compute_godspeed(post_tutor) q_scores['Godspeed'] = gs['Overall'] # UEQ-S _, _, ueq_overall_t = compute_ueqs(post_tutor, UEQS_COLS_TUTORING) q_scores['UEQ_S'] = ueq_overall_t # Match on participant + medium (since topic info isn't in questionnaire directly, # we can aggregate by participant to get mean questionnaire scores) q_agg = q_scores.groupby('Participant').mean(numeric_only=True) gains_agg = gains.groupby('Participant')['Score_Gain'].mean() merged = q_agg.join(gains_agg, how='inner') # Correlation matrix (exclude Social Presence — VR-only, too many NaN) corr_cols = ['IMI_Interest', 'IMI_Value', 'IMI_Choice', 'SUS', 'UEQ_S', 'NASA_TLX', 'IOS', 'Godspeed', 'Score_Gain'] corr_labels = ['IMI\nInterest', 'IMI\nValue', 'IMI\nChoice', 'SUS', 'UEQ-S', 'NASA-TLX', 'IOS', 'Godspeed', 'Tutoring\nScore Gain'] corr_data = merged[corr_cols] corr_matrix = corr_data.corr() # Also compute p-values n = len(corr_data.dropna()) p_matrix = pd.DataFrame(np.ones((len(corr_cols), len(corr_cols))), index=corr_cols, columns=corr_cols) for i_c in range(len(corr_cols)): for j_c in range(i_c + 1, len(corr_cols)): valid = corr_data[[corr_cols[i_c], corr_cols[j_c]]].dropna() if len(valid) >= 3: r, p = stats.pearsonr(valid.iloc[:, 0], valid.iloc[:, 1]) p_matrix.iloc[i_c, j_c] = p p_matrix.iloc[j_c, i_c] = p fig, ax = plt.subplots(figsize=(10, 8)) n_vars = len(corr_cols) # Relabel the correlation matrix corr_plot = corr_matrix.copy() corr_plot.index = corr_labels corr_plot.columns = corr_labels # Build annotation with significance stars annot_strs = [] for i_c in range(n_vars): row_strs = [] for j_c in range(n_vars): r = corr_matrix.iloc[i_c, j_c] if np.isnan(r): row_strs.append('') else: p = p_matrix.iloc[i_c, j_c] star = '' if i_c != j_c: if p < 0.01: star = '**' elif p < 0.05: star = '*' row_strs.append(f'{r:.2f}{star}') annot_strs.append(row_strs) annot_arr = np.array(annot_strs) # Lower triangle mask mask = np.triu(np.ones((n_vars, n_vars), dtype=bool), k=1) sns.heatmap(corr_plot, mask=mask, annot=annot_arr, fmt='', cmap='RdBu_r', center=0, vmin=-1, vmax=1, linewidths=1, linecolor='white', ax=ax, annot_kws={'fontsize': 10, 'fontweight': 'bold'}, cbar_kws={'shrink': 0.8}) ax.set_title('Questionnaire Subscale Correlations & Learning Gain\n(* p<.05, ** p<.01)', fontsize=13, fontweight='bold') fig.tight_layout() save(fig, 'correlation_heatmap', 'Subscale correlations with learning gain') # ========================================================================= # Q11: Reading vs Tutoring Phase Comparison Dashboard # ========================================================================= print("\nK. Phase Comparisons") fig, axes = plt.subplots(2, 2, figsize=(14, 10)) # IMI Interest: Reading vs Tutoring (overall means) ax = axes[0, 0] for sub in IMI_SUBSCALES: label = sub['label'].replace('\n', ' ') r_mean = compute_imi(post_read, sub).mean() t_mean = compute_imi(post_tutor, sub).mean() r_sem = compute_imi(post_read, sub).sem() t_sem = compute_imi(post_tutor, sub).sem() idx = IMI_SUBSCALES.index(sub) ax.errorbar([0, 1], [r_mean, t_mean], yerr=[r_sem, t_sem], marker='o', capsize=4, label=label, linewidth=2) ax.set_xticks([0, 1]) ax.set_xticklabels(['Reading', 'Tutoring']) ax.set_ylabel('Mean Score (1-7)') ax.set_ylim(2, 6) ax.set_title('IMI Subscales', fontweight='bold') ax.legend(fontsize=8) # NASA-TLX Overall: Reading vs Tutoring by Medium ax = axes[0, 1] for m in MEDIUMS: r_vals = tlx_all[(tlx_all['Phase']=='Reading') & (tlx_all['Medium']==m)]['Overall'] t_vals = tlx_all[(tlx_all['Phase']=='Tutoring') & (tlx_all['Medium']==m)]['Overall'] ax.errorbar([0, 1], [r_vals.mean(), t_vals.mean()], yerr=[r_vals.sem(), t_vals.sem()], marker='o', capsize=4, label=m, color=MED_COLORS[m], linewidth=2) ax.set_xticks([0, 1]) ax.set_xticklabels(['Reading', 'Tutoring']) ax.set_ylabel('Workload (1-7)') ax.set_title('NASA-TLX Overall', fontweight='bold') ax.legend(fontsize=9) # UEQ-S Overall: Reading vs Tutoring by Medium ax = axes[1, 0] for m in MEDIUMS: r_vals = ueq_all[(ueq_all['Phase']=='Reading') & (ueq_all['Medium']==m)]['Overall'] t_vals = ueq_all[(ueq_all['Phase']=='Tutoring') & (ueq_all['Medium']==m)]['Overall'] ax.errorbar([0, 1], [r_vals.mean(), t_vals.mean()], yerr=[r_vals.sem(), t_vals.sem()], marker='o', capsize=4, label=m, color=MED_COLORS[m], linewidth=2) ax.set_xticks([0, 1]) ax.set_xticklabels(['Reading', 'Tutoring']) ax.set_ylabel('UEQ-S Score (-3 to +3)') ax.set_title('UEQ-S Overall', fontweight='bold') ax.axhline(0, color='black', ls='-', lw=0.5) ax.legend(fontsize=9) # Stress: Pre-Reading vs Pre-Tutoring by Medium ax = axes[1, 1] for m in MEDIUMS: r_vals = pre_all[(pre_all['Phase']=='Pre-Reading') & (pre_all['Medium']==m)]['Stressed'] t_vals = pre_all[(pre_all['Phase']=='Pre-Tutoring') & (pre_all['Medium']==m)]['Stressed'] ax.errorbar([0, 1], [r_vals.mean(), t_vals.mean()], yerr=[r_vals.sem(), t_vals.sem()], marker='o', capsize=4, label=m, color=MED_COLORS[m], linewidth=2) ax.set_xticks([0, 1]) ax.set_xticklabels(['Pre-Reading', 'Pre-Tutoring']) ax.set_ylabel('Stress Rating (1-7)') ax.set_title('Pre-Session Stress', fontweight='bold') ax.legend(fontsize=9) fig.suptitle('Reading vs Tutoring Phase Comparison', fontsize=15, fontweight='bold') fig.tight_layout() save(fig, 'phase_comparison_dashboard', 'Reading vs Tutoring phase comparison dashboard') # ========================================================================= # Q12: VR-Specific Analysis (Social Presence + Cybersickness + Godspeed) # ========================================================================= print("\nL. VR-Specific Analysis") vr_data = post_tutor[post_tutor['Medium_col'] == 'VR'] fig, axes = plt.subplots(1, 3, figsize=(16, 5)) # Social Presence: VR vs others ax = axes[0] for i, m in enumerate(MEDIUMS): data = sp_items[sp_items['Medium'] == m] means = data['Overall'].mean() sems = data['Overall'].sem() color = MED_COLORS[m] ax.bar(i, means, yerr=sems, capsize=4, color=color, edgecolor='gray', width=0.6, label=m) ax.text(i, means + sems + 0.15, f'{means:.2f}', ha='center', fontsize=10, fontweight='bold') ax.set_xticks(range(3)) ax.set_xticklabels(MEDIUMS) ax.set_ylabel('Mean Social Presence (1-5)') ax.set_ylim(1, 5) ax.set_title('Social Presence', fontweight='bold') # Cybersickness: VR-specific item detail ax = axes[1] if is_binary: vr_cyber = cyber[cyber['Medium'] == 'VR'] pcts = [(vr_cyber[s] == 1).sum() / len(vr_cyber) * 100 for s in CYBERSICKNESS_LABELS] colors = ['#EF5350' if p > 30 else '#FFA726' if p > 10 else '#66BB6A' for p in pcts] bars = ax.bar(range(len(CYBERSICKNESS_LABELS)), pcts, color=colors, edgecolor='gray') for bar, p in zip(bars, pcts): ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 1.5, f'{p:.0f}%', ha='center', fontsize=10, fontweight='bold') ax.set_ylabel('% VR Participants Reporting') ax.set_ylim(0, 100) else: vr_cyber = cyber[cyber['Medium'] == 'VR'] means = [vr_cyber[s].mean() for s in CYBERSICKNESS_LABELS] sems = [vr_cyber[s].sem() for s in CYBERSICKNESS_LABELS] ax.bar(range(len(CYBERSICKNESS_LABELS)), means, yerr=sems, capsize=3, color='#66BB6A', edgecolor='gray') ax.set_ylabel('Severity') ax.set_xticks(range(len(CYBERSICKNESS_LABELS))) ax.set_xticklabels(CYBERSICKNESS_LABELS, fontsize=8) ax.set_title('VR Cybersickness', fontweight='bold') # Godspeed: VR vs others ax = axes[2] gs_overall_by_m = gs_df.groupby('Medium')['Overall'].agg(['mean', 'sem']) for i, m in enumerate(MEDIUMS): mean_v = gs_overall_by_m.loc[m, 'mean'] sem_v = gs_overall_by_m.loc[m, 'sem'] ax.bar(i, mean_v, yerr=sem_v, capsize=4, color=MED_COLORS[m], edgecolor='gray', width=0.6) ax.text(i, mean_v + sem_v + 0.05, f'{mean_v:.2f}', ha='center', fontsize=10, fontweight='bold') ax.set_xticks(range(3)) ax.set_xticklabels(MEDIUMS) ax.set_ylabel('Mean Godspeed (1-5)') ax.set_ylim(1, 5) ax.set_title('Godspeed: Tutor Impression', fontweight='bold') fig.suptitle('VR-Specific Comparisons', fontsize=14, fontweight='bold') fig.tight_layout() save(fig, 'vr_specific', 'VR-specific: social presence, cybersickness, Godspeed') # ========================================================================= # Export statistics to CSV # ========================================================================= def stats_by_medium(data_df, value_col, mediums=MEDIUMS): rows = [] for m in mediums: vals = data_df[data_df['Medium'] == m][value_col].dropna() rows.append({ 'Medium': m, 'N': len(vals), 'Mean': vals.mean(), 'SD': vals.std(), 'SEM': vals.sem(), 'Median': vals.median(), 'Min': vals.min(), 'Max': vals.max(), }) return pd.DataFrame(rows) # SUS stats_by_medium(sus_df, 'SUS').to_csv( STATS_DIR / 'questionnaire_sus_by_medium.csv', index=False, float_format='%.3f') # IMI subscales (tutoring phase) imi_rows = [] for sub in IMI_SUBSCALES: label = sub['label'].replace('\n', ' ') for m in MEDIUMS: vals = imi_tutor[imi_tutor['Medium'] == m][sub['label']].dropna() imi_rows.append({'Subscale': label, 'Phase': 'Tutoring', 'Medium': m, 'N': len(vals), 'Mean': vals.mean(), 'SD': vals.std(), 'SEM': vals.sem()}) for m in MEDIUMS: vals = imi_read[imi_read['Medium'] == m][sub['label']].dropna() imi_rows.append({'Subscale': label, 'Phase': 'Reading', 'Medium': m, 'N': len(vals), 'Mean': vals.mean(), 'SD': vals.std(), 'SEM': vals.sem()}) pd.DataFrame(imi_rows).to_csv( STATS_DIR / 'questionnaire_imi_by_medium.csv', index=False, float_format='%.3f') # UEQ-S ueq_rows = [] for dim in ['Pragmatic', 'Hedonic', 'Overall']: for phase in ['Reading', 'Tutoring']: for m in MEDIUMS: vals = ueq_all[(ueq_all['Phase'] == phase) & (ueq_all['Medium'] == m)][dim].dropna() ueq_rows.append({'Dimension': dim, 'Phase': phase, 'Medium': m, 'N': len(vals), 'Mean': vals.mean(), 'SD': vals.std(), 'SEM': vals.sem()}) pd.DataFrame(ueq_rows).to_csv( STATS_DIR / 'questionnaire_ueqs_by_medium.csv', index=False, float_format='%.3f') # NASA-TLX tlx_rows = [] sub_names_all = NASATLX_LABELS + ['Overall'] for phase in ['Reading', 'Tutoring']: for m in MEDIUMS: data = tlx_all[(tlx_all['Phase'] == phase) & (tlx_all['Medium'] == m)] for s in sub_names_all: vals = data[s].dropna() tlx_rows.append({'Subscale': s.replace('\n', ' '), 'Phase': phase, 'Medium': m, 'N': len(vals), 'Mean': vals.mean(), 'SD': vals.std(), 'SEM': vals.sem()}) pd.DataFrame(tlx_rows).to_csv( STATS_DIR / 'questionnaire_nasatlx_by_medium.csv', index=False, float_format='%.3f') # Godspeed gs_rows = [] for subscale in list(GODSPEED_SUBSCALES.keys()) + ['Overall']: for m in MEDIUMS: vals = gs_df[gs_df['Medium'] == m][subscale].dropna() gs_rows.append({'Subscale': subscale.replace('\n', ' '), 'Medium': m, 'N': len(vals), 'Mean': vals.mean(), 'SD': vals.std(), 'SEM': vals.sem()}) pd.DataFrame(gs_rows).to_csv( STATS_DIR / 'questionnaire_godspeed_by_medium.csv', index=False, float_format='%.3f') # Social Presence sp_rows = [] for item_name in SOCIAL_PRESENCE_LABELS + ['Overall']: for m in MEDIUMS: vals = sp_items[sp_items['Medium'] == m][item_name].dropna() sp_rows.append({'Item': item_name, 'Medium': m, 'N': len(vals), 'Mean': vals.mean(), 'SD': vals.std(), 'SEM': vals.sem()}) pd.DataFrame(sp_rows).to_csv( STATS_DIR / 'questionnaire_social_presence_by_medium.csv', index=False, float_format='%.3f') # IOS + Extra measures ios_rows = [] for col_name, col_key in [('IOS', 'IOS'), ('Self_Use', 'self_use'), ('Felt_Helpful', 'felt_helpful')]: if col_name == 'IOS': source_df = ios_data.rename(columns={'IOS': col_name}) else: source_df = pd.DataFrame({ col_name: pd.to_numeric(post_tutor.iloc[:, EXTRA_COLS[col_key]], errors='coerce'), 'Medium': post_tutor['Medium_col'] }) for m in MEDIUMS: vals = source_df[source_df['Medium'] == m][col_name].dropna() ios_rows.append({'Measure': col_name, 'Medium': m, 'N': len(vals), 'Mean': vals.mean(), 'SD': vals.std(), 'SEM': vals.sem()}) pd.DataFrame(ios_rows).to_csv( STATS_DIR / 'questionnaire_additional_by_medium.csv', index=False, float_format='%.3f') # Correlation matrix with p-values corr_export_rows = [] for i_c in range(len(corr_cols)): for j_c in range(len(corr_cols)): corr_export_rows.append({ 'Var1': corr_cols[i_c], 'Var2': corr_cols[j_c], 'r': corr_matrix.iloc[i_c, j_c], 'p': p_matrix.iloc[i_c, j_c], 'sig': ('**' if p_matrix.iloc[i_c, j_c] < 0.01 else '*' if p_matrix.iloc[i_c, j_c] < 0.05 else ''), }) pd.DataFrame(corr_export_rows).to_csv( STATS_DIR / 'questionnaire_correlations.csv', index=False, float_format='%.4f') # Pre-session states pre_state_rows = [] for item in ['Stressed', 'Ready', 'Relaxed']: for phase in ['Pre-Reading', 'Pre-Tutoring']: for m in MEDIUMS: vals = pre_all[(pre_all['Phase'] == phase) & (pre_all['Medium'] == m)][item].dropna() pre_state_rows.append({'Item': item, 'Phase': phase, 'Medium': m, 'N': len(vals), 'Mean': vals.mean(), 'SD': vals.std(), 'SEM': vals.sem()}) pd.DataFrame(pre_state_rows).to_csv( STATS_DIR / 'questionnaire_pre_session_states.csv', index=False, float_format='%.3f') print(f"\n Stats exported to: {STATS_DIR}") # ========================================================================= # Print summary statistics # ========================================================================= print("\n" + "=" * 70) print("QUESTIONNAIRE SUMMARY STATISTICS") print("=" * 70) # SUS print("\nSUS Scores by Medium:") for m in MEDIUMS: vals = sus_df[sus_df['Medium'] == m]['SUS'].dropna() print(f" {m}: M={vals.mean():.1f}, SD={vals.std():.1f}, Median={vals.median():.1f}") # IMI print("\nIMI by Medium (Tutoring):") for sub in IMI_SUBSCALES: label = sub['label'].replace('\n', ' ') for m in MEDIUMS: vals = imi_tutor[imi_tutor['Medium'] == m][sub['label']].dropna() print(f" {label}/{m}: M={vals.mean():.2f}, SD={vals.std():.2f}") # UEQ-S print("\nUEQ-S Overall by Medium (Tutoring):") for m in MEDIUMS: vals = ueq_tutor[ueq_tutor['Medium'] == m]['Overall'].dropna() print(f" {m}: M={vals.mean():.2f}, SD={vals.std():.2f}") # NASA-TLX print("\nNASA-TLX Overall by Medium (Tutoring):") for m in MEDIUMS: vals = tlx_all[(tlx_all['Phase'] == 'Tutoring') & (tlx_all['Medium'] == m)]['Overall'].dropna() print(f" {m}: M={vals.mean():.2f}, SD={vals.std():.2f}") # Social Presence print("\nSocial Presence Overall by Medium:") for m in MEDIUMS: vals = sp_items[sp_items['Medium'] == m]['Overall'].dropna() print(f" {m}: M={vals.mean():.2f}, SD={vals.std():.2f}") # Godspeed print("\nGodspeed Overall by Medium:") for m in MEDIUMS: vals = gs_df[gs_df['Medium'] == m]['Overall'].dropna() print(f" {m}: M={vals.mean():.2f}, SD={vals.std():.2f}") # IOS print("\nIOS by Medium:") for m in MEDIUMS: vals = ios_data[ios_data['Medium'] == m]['IOS'].dropna() print(f" {m}: M={vals.mean():.2f}, SD={vals.std():.2f}") print(f"\n{plot_num} plots saved to: {PLOT_DIR}") if __name__ == "__main__": main()