#!/usr/bin/env python3
"""
Combined script that merges aggregate_segments_analysis.py and hr_convergence_analysis.py
functionality to compute HR alignment across recordings and mark the first convergence point.
"""

import math
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


def ensure_dir(path: Path) -> None:
    path.mkdir(parents=True, exist_ok=True)


def read_signal_csv(csv_path: Path, value_column: str) -> pd.DataFrame:
    if not csv_path.exists():
        return pd.DataFrame(columns=[value_column])
    df = pd.read_csv(csv_path)
    if 'timestamp' not in df.columns or value_column not in df.columns:
        return pd.DataFrame(columns=[value_column])
    df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms', errors='coerce')
    df = df.dropna(subset=['timestamp']).set_index('timestamp').sort_index()
    df[value_column] = pd.to_numeric(df[value_column], errors='coerce')
    df = df.dropna(subset=[value_column])
    return df


def read_marks(csv_path: Path) -> pd.Series:
    if not csv_path.exists():
        return pd.Series([], dtype='datetime64[ns]')
    df = pd.read_csv(csv_path)
    if 'timestamp' not in df.columns:
        return pd.Series([], dtype='datetime64[ns]')
    ts = pd.to_datetime(df['timestamp'], unit='ms', errors='coerce').dropna().sort_values()
    return ts


def find_hr_convergence(df: pd.DataFrame) -> dict:
    """
    Find when the HR mean reaches the overall average HR.
    
    Args:
        df: DataFrame with columns ['hr_mean', 'aligned_seconds', 'n']
    
    Returns:
        dict: Analysis results including convergence time and statistics
    """
    # Remove rows where hr_mean is NaN (if any)
    df_clean = df.dropna(subset=['hr_mean'])
    
    if df_clean.empty:
        return None
    
    # Calculate overall average HR across all time points
    overall_avg_hr = df_clean['hr_mean'].mean()
    
    # Calculate weighted average (considering sample sizes at each time point)
    if 'n' in df_clean.columns:
        weighted_avg_hr = np.average(df_clean['hr_mean'], weights=df_clean['n'])
        target_hr = weighted_avg_hr
    else:
        target_hr = overall_avg_hr
    
    # Find when HR mean first reaches the overall average
    hr_values = df_clean['hr_mean'].values
    time_values = df_clean['aligned_seconds'].values
    
    # Find crossings of the target HR
    crossings = []
    
    for i in range(1, len(hr_values)):
        prev_hr = hr_values[i-1]
        curr_hr = hr_values[i]
        
        # Check if we crossed the target (either from above or below)
        if (prev_hr <= target_hr <= curr_hr) or (prev_hr >= target_hr >= curr_hr):
            # Linear interpolation to find exact crossing time
            if curr_hr != prev_hr:  # Avoid division by zero
                t_prev = time_values[i-1]
                t_curr = time_values[i]
                # Interpolate to find exact crossing time
                t_cross = t_prev + (target_hr - prev_hr) * (t_curr - t_prev) / (curr_hr - prev_hr)
                crossings.append({
                    'time': t_cross,
                    'direction': 'up' if curr_hr > prev_hr else 'down',
                    'hr_before': prev_hr,
                    'hr_after': curr_hr
                })
    
    return {
        'target_hr': target_hr,
        'overall_avg_hr': overall_avg_hr,
        'weighted_avg_hr': weighted_avg_hr if 'n' in df_clean.columns else None,
        'crossings': crossings,
        'total_duration': time_values[-1] if len(time_values) > 0 else 0,
        'data_points': len(df_clean)
    }


def compute_and_plot_aligned_hr_with_convergence(recordings_root: Path, out_root: Path) -> None:
    """
    Compute an across-recordings averaged HR curve aligned by the 4 timestamps
    and add a vertical marker for the first convergence point.
    """
    recs: list[dict] = []
    seg_durations: list[tuple[float, float, float, float, float]] = []

    for rec_dir in sorted([p for p in recordings_root.iterdir() if p.is_dir()]):
        rec_name = rec_dir.name
        hr_csv = rec_dir / 'hr.csv'
        ts_csv = rec_dir / 'timestamps.csv'
        if not hr_csv.exists() or not ts_csv.exists():
            continue
        hr_df = read_signal_csv(hr_csv, 'hr')
        marks = read_marks(ts_csv)
        if hr_df.empty or marks is None or len(marks) != 4:
            continue

        # Determine bounds
        start_ts = hr_df.index.min()
        end_ts = hr_df.index.max()
        a, b, c, d = list(marks)
        # Ensure marks fall within HR bounds
        if not (start_ts < a < b < c < d < end_ts):
            # Skip if pathological
            continue

        # Segment durations in seconds
        pre_d = (a - start_ts).total_seconds()
        d12 = (b - a).total_seconds()
        d23 = (c - b).total_seconds()
        d34 = (d - c).total_seconds()
        post_d = (end_ts - d).total_seconds()
        seg_durations.append((pre_d, d12, d23, d34, post_d))

        # Store for second pass
        recs.append({'name': rec_name, 'hr': hr_df, 'start': start_ts, 'marks': (a, b, c, d), 'end': end_ts})

    if not recs:
        print("No valid recordings found for HR alignment")
        return

    # Median segment durations and normalized cumulative boundaries
    med = np.nanmedian(np.array(seg_durations), axis=0)
    total = float(np.sum(med))
    if total <= 0:
        return
    proportions = med / total
    boundaries = np.concatenate(([0.0], np.cumsum(proportions)))  # length 6, last == 1.0

    # Common grid (normalized [0,1]) and scaled seconds by median total duration
    N = 1000
    x_norm = np.linspace(0.0, 1.0, N)
    x_sec = x_norm * total

    # Helper: piecewise-linear map original timestamp to normalized coordinate
    def map_times_to_norm(hr_df: pd.DataFrame, start: pd.Timestamp, marks: tuple, end: pd.Timestamp) -> tuple[np.ndarray, np.ndarray]:
        a, b, c, d = marks
        segments = [
            (start, a, boundaries[0], boundaries[1]),
            (a, b, boundaries[1], boundaries[2]),
            (b, c, boundaries[2], boundaries[3]),
            (c, d, boundaries[3], boundaries[4]),
            (d, end, boundaries[4], boundaries[5]),
        ]
        u_list = []
        y_list = []
        for seg_start, seg_end, u0, u1 in segments:
            seg = hr_df.loc[(hr_df.index >= seg_start) & (hr_df.index <= seg_end), 'hr']
            if seg.empty:
                continue
            dur_ns = (seg_end.value - seg_start.value)
            if dur_ns <= 0:
                continue
            rel = (seg.index.view('int64') - np.int64(seg_start.value)) / np.float64(dur_ns)
            rel = np.clip(rel.astype(float), 0.0, 1.0)
            u = u0 + rel * (u1 - u0)
            u_list.append(u)
            y_list.append(seg.values.astype(float))
        if not u_list:
            return np.array([]), np.array([])
        u_all = np.concatenate(u_list)
        y_all = np.concatenate(y_list)
        order = np.argsort(u_all)
        u_sorted = u_all[order]
        y_sorted = y_all[order]
        uniq_u, idx_start = np.unique(u_sorted, return_index=True)
        counts = np.diff(np.append(idx_start, len(u_sorted)))
        y_avg = []
        for i, cnt in enumerate(counts):
            start_idx = idx_start[i]
            y_avg.append(np.mean(y_sorted[start_idx:start_idx + cnt]))
        y_avg = np.asarray(y_avg)
        return uniq_u, y_avg

    curves = []
    for rec in recs:
        u, y = map_times_to_norm(rec['hr'], rec['start'], rec['marks'], rec['end'])
        if u.size < 2:
            continue
        y_grid = np.interp(x_norm, u, y, left=np.nan, right=np.nan)
        y_grid[x_norm < u.min()] = np.nan
        y_grid[x_norm > u.max()] = np.nan
        curves.append(y_grid)

    if not curves:
        print("No valid HR curves generated")
        return

    Y = np.vstack(curves)
    n = np.sum(~np.isnan(Y), axis=0)
    marr = np.ma.array(Y, mask=np.isnan(Y))
    mean = marr.mean(axis=0).filled(np.nan)
    std = marr.std(axis=0).filled(np.nan)

    # Create DataFrame for convergence analysis
    hr_df_analysis = pd.DataFrame({
        'normalized_time': x_norm,
        'aligned_seconds': x_sec,
        'hr_mean': mean,
        'hr_std': std,
        'n': n,
    })

    # Find convergence point
    convergence_results = find_hr_convergence(hr_df_analysis)
    first_convergence_time = None
    target_hr = None
    
    if convergence_results and convergence_results['crossings']:
        first_convergence_time = convergence_results['crossings'][0]['time']
        target_hr = convergence_results['target_hr']
        print(f"\nFirst HR convergence found at: {first_convergence_time:.1f} seconds")
        print(f"Target HR (overall average): {target_hr:.3f} BPM")
    else:
        print("\nNo HR convergence found")

    # Plot with convergence marker
    fig, ax = plt.subplots(figsize=(14, 6))
    
    # Main HR plot
    ax.plot(x_sec, mean, color='tab:blue', linewidth=2, label='Mean HR')
    ax.fill_between(x_sec, mean - std, mean + std, color='tab:blue', alpha=0.2, label='±1 SD')
    
    # Segment boundaries (dashed vertical lines)
    for xb in boundaries * total:
        ax.axvline(xb, color='k', linestyle='--', linewidth=0.8, alpha=0.5)
    
    # Target HR line (overall average)
    if target_hr is not None:
        ax.axhline(target_hr, color='red', linestyle=':', linewidth=1.5, alpha=0.7, 
                   label=f'Overall Average ({target_hr:.1f} BPM)')
    
    # Convergence marker
    if first_convergence_time is not None:
        ax.axvline(first_convergence_time, color='red', linestyle='-', linewidth=3, alpha=0.8,
                   label=f'First Convergence ({first_convergence_time:.1f}s)')
        
        # Add annotation
        ax.annotate(f'First Convergence\n{first_convergence_time:.1f}s', 
                   xy=(first_convergence_time, target_hr),
                   xytext=(first_convergence_time + 50, target_hr + 2),
                   arrowprops=dict(arrowstyle='->', color='red', alpha=0.8),
                   fontsize=10, ha='center', va='bottom',
                   bbox=dict(boxstyle='round,pad=0.3', facecolor='white', alpha=0.8))
    
    ax.set_xlabel('Aligned time (s)')
    ax.set_ylabel('HR (bpm)')
    ax.set_title('Aligned Average HR Across Recordings with Convergence Marker')
    ax.grid(True, alpha=0.3)
    ax.legend(loc='upper right')
    fig.tight_layout()
    
    # Save plot
    fig.savefig(out_root / 'HR_average_aligned_with_convergence.png', dpi=150, bbox_inches='tight')
    plt.close(fig)

    # Save CSV data
    out_csv = out_root / 'HR_average_aligned_with_convergence.csv'
    hr_df_analysis.to_csv(out_csv, index=False)
    
    
    print(f"\nHR alignment with convergence analysis complete.")
    print(f"Outputs saved to: {out_root}")
    print(f"- Plot: HR_average_aligned_with_convergence.png")
    print(f"- Data: HR_average_aligned_with_convergence.csv")


def main() -> None:
    """Main function to run the combined HR analysis."""
    root = Path(__file__).resolve().parent
    recordings_root = root / 'SingleRecordings'
    out_root = root / 'Plots' / 'Aggregate'
    ensure_dir(out_root)
    
    print("Combined HR Analysis: Alignment + Convergence Detection")
    print("="*60)
    print(f"Processing recordings from: {recordings_root}")
    print(f"Output directory: {out_root}")
    
    # Count valid recordings
    valid_count = 0
    for rec_dir in sorted([p for p in recordings_root.iterdir() if p.is_dir()]):
        hr_csv = rec_dir / 'hr.csv'
        ts_csv = rec_dir / 'timestamps.csv'
        if hr_csv.exists() and ts_csv.exists():
            marks = read_marks(ts_csv)
            if marks is not None and len(marks) == 4:
                valid_count += 1
    
    print(f"Found {valid_count} recordings with exactly 4 timestamps")
    
    if valid_count == 0:
        print("No valid recordings found. Please check that recordings have exactly 4 timestamps.")
        return
    
    # Run the combined analysis
    compute_and_plot_aligned_hr_with_convergence(recordings_root, out_root)


if __name__ == '__main__':
    main()