198 lines
7.5 KiB
Python
198 lines
7.5 KiB
Python
"""
|
||
Friedman Test on medium preference rankings.
|
||
|
||
Data format: each row = one participant; Rank1/Rank2/Rank3 hold the
|
||
condition name (VR, Chat, Video) assigned to that rank position.
|
||
|
||
We reshape so every participant has a numeric rank (1–3) for every
|
||
condition, then run a Friedman test and save a summary plot.
|
||
"""
|
||
|
||
import numpy as np
|
||
import pandas as pd
|
||
import matplotlib.pyplot as plt
|
||
import matplotlib.patches as mpatches
|
||
import seaborn as sns
|
||
from pathlib import Path
|
||
from scipy import stats
|
||
|
||
# ── 1. Load & reshape ─────────────────────────────────────────────────────────
|
||
df_raw = pd.read_csv("Data/medium_order.csv")
|
||
|
||
# Build a long table: (Participant, Condition, Rank)
|
||
records = []
|
||
for _, row in df_raw.iterrows():
|
||
for rank_pos, col in enumerate(["Rank1", "Rank2", "Rank3"], start=1):
|
||
records.append({
|
||
"Participant": row["Participant"],
|
||
"Condition": row[col],
|
||
"Rank": rank_pos,
|
||
})
|
||
|
||
df_long = pd.DataFrame(records)
|
||
|
||
# Pivot to wide: rows = participants, columns = conditions
|
||
df_wide = df_long.pivot(index="Participant", columns="Condition", values="Rank")
|
||
df_wide = df_wide[["VR", "Chat", "Video"]] # fix column order
|
||
|
||
print("=" * 55)
|
||
print("Reshaped ranking matrix (numeric ranks per condition)")
|
||
print("=" * 55)
|
||
print(df_wide.to_string())
|
||
print()
|
||
|
||
# ── 2. Descriptive statistics ─────────────────────────────────────────────────
|
||
desc = df_wide.agg(["mean", "median", "std"]).T
|
||
desc.index.name = "Condition"
|
||
desc.columns = ["Mean rank", "Median rank", "SD"]
|
||
|
||
print("=" * 55)
|
||
print("Descriptive statistics (lower rank = more preferred)")
|
||
print("=" * 55)
|
||
print(desc.round(3).to_string())
|
||
print()
|
||
|
||
# ── 3. Friedman test ──────────────────────────────────────────────────────────
|
||
# scipy expects one array per group (condition), each of length n_participants
|
||
statistic, p_value = stats.friedmanchisquare(
|
||
df_wide["VR"],
|
||
df_wide["Chat"],
|
||
df_wide["Video"],
|
||
)
|
||
|
||
n = len(df_wide)
|
||
k = 3 # number of conditions
|
||
df_friedman = k - 1 # degrees of freedom
|
||
# Kendall's W (effect size)
|
||
W = statistic / (n * (k - 1))
|
||
|
||
print("=" * 55)
|
||
print("Friedman test")
|
||
print("=" * 55)
|
||
print(f" N (participants) : {n}")
|
||
print(f" k (conditions) : {k}")
|
||
print(f" chi2({df_friedman}) : {statistic:.4f}")
|
||
print(f" p-value : {p_value:.4f}")
|
||
print(f" Kendall's W : {W:.4f}")
|
||
print()
|
||
if p_value < 0.05:
|
||
print(" --> Significant difference in rankings (p < .05).")
|
||
else:
|
||
print(" --> No significant difference in rankings (p >= .05).")
|
||
print()
|
||
|
||
# ── 4. Post-hoc: Wilcoxon signed-rank tests (Bonferroni-corrected) ────────────
|
||
from itertools import combinations
|
||
|
||
pairs = list(combinations(["VR", "Chat", "Video"], 2))
|
||
n_pairs = len(pairs)
|
||
alpha_corr = 0.05 / n_pairs # Bonferroni threshold
|
||
|
||
print("=" * 55)
|
||
print(f"Post-hoc: Wilcoxon signed-rank tests")
|
||
print(f"(Bonferroni-corrected alpha = {alpha_corr:.4f})")
|
||
print("=" * 55)
|
||
|
||
posthoc_rows = []
|
||
for a, b in pairs:
|
||
stat_w, p_w = stats.wilcoxon(df_wide[a], df_wide[b])
|
||
sig = "*" if p_w < alpha_corr else ""
|
||
posthoc_rows.append({
|
||
"Pair": f"{a} vs {b}",
|
||
"W statistic": round(stat_w, 4),
|
||
"p (raw)": round(p_w, 4),
|
||
"p (x3)": round(min(p_w * n_pairs, 1.0), 4),
|
||
"Sig.": sig,
|
||
})
|
||
|
||
df_posthoc = pd.DataFrame(posthoc_rows).set_index("Pair")
|
||
print(df_posthoc.to_string())
|
||
print(f"\n * Significant after Bonferroni correction (alpha = {alpha_corr:.4f})")
|
||
|
||
# ── 5. Plot ───────────────────────────────────────────────────────────────────
|
||
CONDITIONS = ["VR", "Chat", "Video"]
|
||
MED_COLORS = {"Chat": "#42A5F5", "Video": "#FFA726", "VR": "#66BB6A"}
|
||
RANK_COLORS = ["#4CAF50", "#FFC107", "#F44336"] # 1st, 2nd, 3rd
|
||
|
||
sns.set_theme(style="whitegrid", font_scale=1.05)
|
||
plt.rcParams["figure.dpi"] = 150
|
||
plt.rcParams["savefig.bbox"] = "tight"
|
||
|
||
fig, axes = plt.subplots(1, 2, figsize=(13, 5))
|
||
|
||
# ── Left panel: mean rank + individual jitter ─────────────────────────────────
|
||
ax = axes[0]
|
||
x_pos = np.arange(len(CONDITIONS))
|
||
|
||
for i, cond in enumerate(CONDITIONS):
|
||
vals = df_wide[cond].values
|
||
jitter = np.random.default_rng(42).uniform(-0.12, 0.12, size=len(vals))
|
||
ax.scatter(np.full(len(vals), i) + jitter, vals,
|
||
color=MED_COLORS[cond], alpha=0.55, s=40, zorder=3)
|
||
|
||
means = [df_wide[c].mean() for c in CONDITIONS]
|
||
sems = [df_wide[c].sem() for c in CONDITIONS]
|
||
|
||
bars = ax.bar(x_pos, means, yerr=sems, capsize=5,
|
||
color=[MED_COLORS[c] for c in CONDITIONS],
|
||
edgecolor="gray", linewidth=0.6, alpha=0.75, width=0.5, zorder=2)
|
||
|
||
for i, (m, s) in enumerate(zip(means, sems)):
|
||
ax.text(i, m + s + 0.08, f"M={m:.2f}", ha="center", fontsize=10, fontweight="bold")
|
||
|
||
ax.set_xticks(x_pos)
|
||
ax.set_xticklabels(CONDITIONS, fontsize=12)
|
||
ax.set_ylim(0.5, 3.8)
|
||
ax.set_yticks([1, 2, 3])
|
||
ax.set_yticklabels(["1st\n(most preferred)", "2nd", "3rd\n(least preferred)"], fontsize=9)
|
||
ax.set_ylabel("Rank (lower = more preferred)", fontsize=11)
|
||
ax.set_title("Mean Rank per Condition\n(individual observations + SEM)", fontsize=11, fontweight="bold")
|
||
|
||
stat_label = (
|
||
f"Friedman: chi2({df_friedman}) = {statistic:.2f}, "
|
||
f"p = {p_value:.3f}, W = {W:.2f}"
|
||
)
|
||
ax.text(0.5, 0.03, stat_label, transform=ax.transAxes,
|
||
ha="center", va="bottom", fontsize=8.5, color="dimgray",
|
||
bbox=dict(boxstyle="round,pad=0.3", fc="white", ec="lightgray", alpha=0.8))
|
||
|
||
# ── Right panel: stacked rank-distribution bars ───────────────────────────────
|
||
ax = axes[1]
|
||
n_participants = len(df_wide)
|
||
rank_counts = {
|
||
cond: [(df_wide[cond] == r).sum() / n_participants * 100 for r in [1, 2, 3]]
|
||
for cond in CONDITIONS
|
||
}
|
||
|
||
bottoms = np.zeros(len(CONDITIONS))
|
||
for rank_idx, (rank_label, color) in enumerate(
|
||
zip(["1st choice", "2nd choice", "3rd choice"], RANK_COLORS)
|
||
):
|
||
heights = [rank_counts[c][rank_idx] for c in CONDITIONS]
|
||
bars = ax.bar(x_pos, heights, bottom=bottoms,
|
||
color=color, edgecolor="white", linewidth=0.8,
|
||
width=0.55, label=rank_label)
|
||
for xi, (h, b) in enumerate(zip(heights, bottoms)):
|
||
if h >= 8:
|
||
ax.text(xi, b + h / 2, f"{h:.0f}%",
|
||
ha="center", va="center", fontsize=10,
|
||
fontweight="bold", color="white")
|
||
bottoms += np.array(heights)
|
||
|
||
ax.set_xticks(x_pos)
|
||
ax.set_xticklabels(CONDITIONS, fontsize=12)
|
||
ax.set_ylim(0, 105)
|
||
ax.set_ylabel("Percentage of participants (%)", fontsize=11)
|
||
ax.set_title("Rank Distribution per Condition\n(% participants assigning each rank)", fontsize=11, fontweight="bold")
|
||
ax.legend(loc="upper right", fontsize=9,
|
||
handles=[mpatches.Patch(color=c, label=l)
|
||
for c, l in zip(RANK_COLORS, ["1st choice", "2nd choice", "3rd choice"])])
|
||
|
||
fig.suptitle("Medium Preference Rankings (N=18, within-subjects)", fontsize=13, fontweight="bold")
|
||
fig.tight_layout()
|
||
|
||
out_path = Path("Data/plots_questionnaires/Q14_medium_ranking.png")
|
||
fig.savefig(out_path)
|
||
plt.close(fig)
|
||
print(f"\nPlot saved: {out_path}")
|