Files
Virtual-Tutor-Eval/analysis_medium_ranking.py
2026-02-22 18:24:57 +01:00

198 lines
7.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
Friedman Test on medium preference rankings.
Data format: each row = one participant; Rank1/Rank2/Rank3 hold the
condition name (VR, Chat, Video) assigned to that rank position.
We reshape so every participant has a numeric rank (13) for every
condition, then run a Friedman test and save a summary plot.
"""
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import seaborn as sns
from pathlib import Path
from scipy import stats
# ── 1. Load & reshape ─────────────────────────────────────────────────────────
df_raw = pd.read_csv("Data/medium_order.csv")
# Build a long table: (Participant, Condition, Rank)
records = []
for _, row in df_raw.iterrows():
for rank_pos, col in enumerate(["Rank1", "Rank2", "Rank3"], start=1):
records.append({
"Participant": row["Participant"],
"Condition": row[col],
"Rank": rank_pos,
})
df_long = pd.DataFrame(records)
# Pivot to wide: rows = participants, columns = conditions
df_wide = df_long.pivot(index="Participant", columns="Condition", values="Rank")
df_wide = df_wide[["VR", "Chat", "Video"]] # fix column order
print("=" * 55)
print("Reshaped ranking matrix (numeric ranks per condition)")
print("=" * 55)
print(df_wide.to_string())
print()
# ── 2. Descriptive statistics ─────────────────────────────────────────────────
desc = df_wide.agg(["mean", "median", "std"]).T
desc.index.name = "Condition"
desc.columns = ["Mean rank", "Median rank", "SD"]
print("=" * 55)
print("Descriptive statistics (lower rank = more preferred)")
print("=" * 55)
print(desc.round(3).to_string())
print()
# ── 3. Friedman test ──────────────────────────────────────────────────────────
# scipy expects one array per group (condition), each of length n_participants
statistic, p_value = stats.friedmanchisquare(
df_wide["VR"],
df_wide["Chat"],
df_wide["Video"],
)
n = len(df_wide)
k = 3 # number of conditions
df_friedman = k - 1 # degrees of freedom
# Kendall's W (effect size)
W = statistic / (n * (k - 1))
print("=" * 55)
print("Friedman test")
print("=" * 55)
print(f" N (participants) : {n}")
print(f" k (conditions) : {k}")
print(f" chi2({df_friedman}) : {statistic:.4f}")
print(f" p-value : {p_value:.4f}")
print(f" Kendall's W : {W:.4f}")
print()
if p_value < 0.05:
print(" --> Significant difference in rankings (p < .05).")
else:
print(" --> No significant difference in rankings (p >= .05).")
print()
# ── 4. Post-hoc: Wilcoxon signed-rank tests (Bonferroni-corrected) ────────────
from itertools import combinations
pairs = list(combinations(["VR", "Chat", "Video"], 2))
n_pairs = len(pairs)
alpha_corr = 0.05 / n_pairs # Bonferroni threshold
print("=" * 55)
print(f"Post-hoc: Wilcoxon signed-rank tests")
print(f"(Bonferroni-corrected alpha = {alpha_corr:.4f})")
print("=" * 55)
posthoc_rows = []
for a, b in pairs:
stat_w, p_w = stats.wilcoxon(df_wide[a], df_wide[b])
sig = "*" if p_w < alpha_corr else ""
posthoc_rows.append({
"Pair": f"{a} vs {b}",
"W statistic": round(stat_w, 4),
"p (raw)": round(p_w, 4),
"p (x3)": round(min(p_w * n_pairs, 1.0), 4),
"Sig.": sig,
})
df_posthoc = pd.DataFrame(posthoc_rows).set_index("Pair")
print(df_posthoc.to_string())
print(f"\n * Significant after Bonferroni correction (alpha = {alpha_corr:.4f})")
# ── 5. Plot ───────────────────────────────────────────────────────────────────
CONDITIONS = ["VR", "Chat", "Video"]
MED_COLORS = {"Chat": "#42A5F5", "Video": "#FFA726", "VR": "#66BB6A"}
RANK_COLORS = ["#4CAF50", "#FFC107", "#F44336"] # 1st, 2nd, 3rd
sns.set_theme(style="whitegrid", font_scale=1.05)
plt.rcParams["figure.dpi"] = 150
plt.rcParams["savefig.bbox"] = "tight"
fig, axes = plt.subplots(1, 2, figsize=(13, 5))
# ── Left panel: mean rank + individual jitter ─────────────────────────────────
ax = axes[0]
x_pos = np.arange(len(CONDITIONS))
for i, cond in enumerate(CONDITIONS):
vals = df_wide[cond].values
jitter = np.random.default_rng(42).uniform(-0.12, 0.12, size=len(vals))
ax.scatter(np.full(len(vals), i) + jitter, vals,
color=MED_COLORS[cond], alpha=0.55, s=40, zorder=3)
means = [df_wide[c].mean() for c in CONDITIONS]
sems = [df_wide[c].sem() for c in CONDITIONS]
bars = ax.bar(x_pos, means, yerr=sems, capsize=5,
color=[MED_COLORS[c] for c in CONDITIONS],
edgecolor="gray", linewidth=0.6, alpha=0.75, width=0.5, zorder=2)
for i, (m, s) in enumerate(zip(means, sems)):
ax.text(i, m + s + 0.08, f"M={m:.2f}", ha="center", fontsize=10, fontweight="bold")
ax.set_xticks(x_pos)
ax.set_xticklabels(CONDITIONS, fontsize=12)
ax.set_ylim(0.5, 3.8)
ax.set_yticks([1, 2, 3])
ax.set_yticklabels(["1st\n(most preferred)", "2nd", "3rd\n(least preferred)"], fontsize=9)
ax.set_ylabel("Rank (lower = more preferred)", fontsize=11)
ax.set_title("Mean Rank per Condition\n(individual observations + SEM)", fontsize=11, fontweight="bold")
stat_label = (
f"Friedman: chi2({df_friedman}) = {statistic:.2f}, "
f"p = {p_value:.3f}, W = {W:.2f}"
)
ax.text(0.5, 0.03, stat_label, transform=ax.transAxes,
ha="center", va="bottom", fontsize=8.5, color="dimgray",
bbox=dict(boxstyle="round,pad=0.3", fc="white", ec="lightgray", alpha=0.8))
# ── Right panel: stacked rank-distribution bars ───────────────────────────────
ax = axes[1]
n_participants = len(df_wide)
rank_counts = {
cond: [(df_wide[cond] == r).sum() / n_participants * 100 for r in [1, 2, 3]]
for cond in CONDITIONS
}
bottoms = np.zeros(len(CONDITIONS))
for rank_idx, (rank_label, color) in enumerate(
zip(["1st choice", "2nd choice", "3rd choice"], RANK_COLORS)
):
heights = [rank_counts[c][rank_idx] for c in CONDITIONS]
bars = ax.bar(x_pos, heights, bottom=bottoms,
color=color, edgecolor="white", linewidth=0.8,
width=0.55, label=rank_label)
for xi, (h, b) in enumerate(zip(heights, bottoms)):
if h >= 8:
ax.text(xi, b + h / 2, f"{h:.0f}%",
ha="center", va="center", fontsize=10,
fontweight="bold", color="white")
bottoms += np.array(heights)
ax.set_xticks(x_pos)
ax.set_xticklabels(CONDITIONS, fontsize=12)
ax.set_ylim(0, 105)
ax.set_ylabel("Percentage of participants (%)", fontsize=11)
ax.set_title("Rank Distribution per Condition\n(% participants assigning each rank)", fontsize=11, fontweight="bold")
ax.legend(loc="upper right", fontsize=9,
handles=[mpatches.Patch(color=c, label=l)
for c, l in zip(RANK_COLORS, ["1st choice", "2nd choice", "3rd choice"])])
fig.suptitle("Medium Preference Rankings (N=18, within-subjects)", fontsize=13, fontweight="bold")
fig.tight_layout()
out_path = Path("Data/plots_questionnaires/Q14_medium_ranking.png")
fig.savefig(out_path)
plt.close(fig)
print(f"\nPlot saved: {out_path}")