Source code for neurological_lrd_analysis.benchmark_core.biomedical_scenarios

"""
Biomedical scenario-based time series data generation.

This module provides realistic biomedical time series generators (EEG, ECG, etc.)
with appropriate contamination methods relevant to real biomedical data.
"""

from dataclasses import dataclass
from typing import List, Optional, Tuple

import numpy as np

from .generation import TimeSeriesSample, fbm_davies_harte


[docs] @dataclass class BiomedicalScenario: """Configuration for biomedical time series scenarios.""" scenario_type: str # 'eeg', 'ecg', 'emg', 'respiratory', 'blood_pressure' sampling_rate: float # Hz duration: float # seconds hurst_range: Tuple[float, float] # Expected Hurst range for this scenario typical_amplitude: float # Typical signal amplitude noise_level: float # Typical noise level (relative to signal) artifact_probability: float # Probability of artifacts
[docs] def generate_eeg_scenario( n: int, hurst: float, scenario: str = "rest", contamination_level: float = 0.1, seed: Optional[int] = None, ) -> np.ndarray: """ Generate realistic EEG time series data. Parameters: ----------- n : int Length of the time series hurst : float Hurst exponent (0 < H < 1) scenario : str EEG scenario ('rest', 'eyes_closed', 'eyes_open', 'sleep', 'seizure') contamination_level : float Level of contamination/artifacts seed : int, optional Random seed for reproducibility Returns: -------- np.ndarray EEG-like time series data """ if seed is not None: np.random.seed(seed) if not (0 < hurst < 1): raise ValueError(f"Hurst exponent must be in (0, 1), got {hurst}") # Generate base fBm signal base_signal = fbm_davies_harte(n, hurst, seed) # Add EEG-specific characteristics based on scenario if scenario == "rest": # Resting state: alpha waves (8-12 Hz) with some theta alpha_freq = 10.0 # Hz theta_freq = 6.0 # Hz elif scenario == "eyes_closed": # Eyes closed: stronger alpha waves alpha_freq = 9.5 # Hz theta_freq = 5.0 # Hz elif scenario == "eyes_open": # Eyes open: reduced alpha, more beta alpha_freq = 8.0 # Hz theta_freq = 6.0 # Hz elif scenario == "sleep": # Sleep: delta waves (0.5-4 Hz) and theta alpha_freq = 2.0 # Hz theta_freq = 1.5 # Hz elif scenario == "seizure": # Seizure: high-frequency activity alpha_freq = 20.0 # Hz theta_freq = 15.0 # Hz elif scenario == "parkinsonian": # Parkinson's disease: reduced alpha, increased beta (13-30 Hz), tremor alpha_freq = 6.0 # Reduced alpha theta_freq = 18.0 # Beta activity elif scenario == "epileptic": # Epilepsy: irregular patterns, spikes, high-frequency bursts alpha_freq = 15.0 # Irregular theta_freq = 25.0 # High-frequency bursts else: raise ValueError(f"Unknown EEG scenario: {scenario}") # Generate time vector (assuming 250 Hz sampling rate) fs = 250.0 t = np.arange(n) / fs # Add rhythmic components (simulating brain waves) alpha_component = 0.3 * np.sin(2 * np.pi * alpha_freq * t) theta_component = 0.2 * np.sin(2 * np.pi * theta_freq * t) # Combine base signal with rhythmic components eeg_signal = base_signal + alpha_component + theta_component # Add EEG-specific artifacts if contamination level > 0 if contamination_level > 0: eeg_signal = add_eeg_artifacts(eeg_signal, contamination_level, seed) # Normalize to typical EEG amplitude (50-100 μV) eeg_signal = eeg_signal * 75.0 # Scale to ~75 μV return eeg_signal
[docs] def generate_ecg_scenario( n: int, hurst: float, heart_rate: float = 70.0, contamination_level: float = 0.1, seed: Optional[int] = None, ) -> np.ndarray: """ Generate realistic ECG time series data. Parameters: ----------- n : int Length of the time series hurst : float Hurst exponent (0 < H < 1) heart_rate : float Heart rate in BPM contamination_level : float Level of contamination/artifacts seed : int, optional Random seed for reproducibility Returns: -------- np.ndarray ECG-like time series data """ if seed is not None: np.random.seed(seed) if not (0 < hurst < 1): raise ValueError(f"Hurst exponent must be in (0, 1), got {hurst}") # Generate base fBm signal base_signal = fbm_davies_harte(n, hurst, seed) # Generate time vector (assuming 250 Hz sampling rate) fs = 250.0 t = np.arange(n) / fs # Generate ECG-like waveform ecg_signal = np.zeros(n) # Calculate RR interval rr_interval = 60.0 / heart_rate # seconds # Generate QRS complexes for i in range(int(t[-1] / rr_interval) + 1): # QRS complex timing qrs_time = i * rr_interval if 0 <= qrs_time <= t[-1]: # Find closest time index qrs_idx = int(qrs_time * fs) if 0 <= qrs_idx < n: # Add QRS complex (simplified) qrs_width = int(0.08 * fs) # 80ms QRS width start_idx = max(0, qrs_idx - qrs_width // 2) end_idx = min(n, qrs_idx + qrs_width // 2) # QRS complex shape (simplified) qrs_shape = np.exp( -( ( (np.arange(end_idx - start_idx) - qrs_width // 2) / (qrs_width // 4) ) ** 2 ) ) ecg_signal[start_idx:end_idx] += 2.0 * qrs_shape # Add P and T waves (simplified) for i in range(int(t[-1] / rr_interval) + 1): rr_time = i * rr_interval if 0 <= rr_time <= t[-1]: rr_idx = int(rr_time * fs) # P wave (before QRS) p_time = rr_time - 0.15 # 150ms before QRS if p_time >= 0: p_idx = int(p_time * fs) if 0 <= p_idx < n: p_width = int(0.08 * fs) start_idx = max(0, p_idx - p_width // 2) end_idx = min(n, p_idx + p_width // 2) p_shape = np.exp( -( ( (np.arange(end_idx - start_idx) - p_width // 2) / (p_width // 3) ) ** 2 ) ) ecg_signal[start_idx:end_idx] += 0.3 * p_shape # T wave (after QRS) t_time = rr_time + 0.25 # 250ms after QRS if t_time <= t[-1]: t_idx = int(t_time * fs) if 0 <= t_idx < n: t_width = int(0.12 * fs) start_idx = max(0, t_idx - t_width // 2) end_idx = min(n, t_idx + t_width // 2) t_shape = np.exp( -( ( (np.arange(end_idx - start_idx) - t_width // 2) / (t_width // 3) ) ** 2 ) ) ecg_signal[start_idx:end_idx] += 0.4 * t_shape # Combine with base signal ecg_signal = ecg_signal + 0.1 * base_signal # Add ECG-specific artifacts if contamination level > 0 if contamination_level > 0: ecg_signal = add_ecg_artifacts(ecg_signal, contamination_level, seed) # Normalize to typical ECG amplitude (1-5 mV) ecg_signal = ecg_signal * 2.0 # Scale to ~2 mV return ecg_signal
[docs] def generate_respiratory_scenario( n: int, hurst: float, breathing_rate: float = 15.0, contamination_level: float = 0.1, seed: Optional[int] = None, ) -> np.ndarray: """ Generate realistic respiratory time series data. Parameters: ----------- n : int Length of the time series hurst : float Hurst exponent (0 < H < 1) breathing_rate : float Breathing rate in breaths per minute contamination_level : float Level of contamination/artifacts seed : int, optional Random seed for reproducibility Returns: -------- np.ndarray Respiratory-like time series data """ if seed is not None: np.random.seed(seed) if not (0 < hurst < 1): raise ValueError(f"Hurst exponent must be in (0, 1), got {hurst}") # Generate base fBm signal base_signal = fbm_davies_harte(n, hurst, seed) # Generate time vector (assuming 10 Hz sampling rate for respiratory) fs = 10.0 t = np.arange(n) / fs # Generate breathing pattern breathing_period = 60.0 / breathing_rate # seconds per breath breathing_signal = np.sin(2 * np.pi * t / breathing_period) # Add some irregularity to breathing irregularity = 0.1 * np.sin(2 * np.pi * 0.1 * t) # Slow variation in breathing rate breathing_signal = np.sin(2 * np.pi * t / breathing_period + irregularity) # Combine with base signal respiratory_signal = breathing_signal + 0.2 * base_signal # Add respiratory-specific artifacts if contamination level > 0 if contamination_level > 0: respiratory_signal = add_respiratory_artifacts( respiratory_signal, contamination_level, seed ) # Normalize to typical respiratory amplitude respiratory_signal = respiratory_signal * 0.5 # Scale to reasonable amplitude return respiratory_signal
[docs] def add_eeg_artifacts( data: np.ndarray, contamination_level: float, seed: Optional[int] = None ) -> np.ndarray: """ Add EEG-specific artifacts to the signal. Parameters: ----------- data : np.ndarray Original EEG signal contamination_level : float Level of contamination seed : int, optional Random seed for reproducibility Returns: -------- np.ndarray EEG signal with artifacts """ if seed is not None: np.random.seed(seed) contaminated = data.copy() n = len(data) # Eye movement artifacts (EOG) if np.random.random() < contamination_level: n_eye_movements = int(contamination_level * 10) # Few eye movements for _ in range(n_eye_movements): if n <= 100: continue # Skip artifacts for very short signals start_idx = np.random.randint(0, n - 100) duration = np.random.randint(50, 200) end_idx = min(n, start_idx + duration) # Eye movement artifact (slow, large amplitude) eye_artifact = np.linspace( 0, np.random.choice([-1, 1]) * 200, end_idx - start_idx ) contaminated[start_idx:end_idx] += eye_artifact # Muscle artifacts (EMG) if np.random.random() < contamination_level * 0.5: n_muscle_artifacts = int(contamination_level * 20) for _ in range(n_muscle_artifacts): start_idx = np.random.randint(0, n - 50) duration = np.random.randint(10, 100) end_idx = min(n, start_idx + duration) # Muscle artifact (high frequency, random amplitude) muscle_artifact = np.random.randn(end_idx - start_idx) * 50 contaminated[start_idx:end_idx] += muscle_artifact # Electrode artifacts (sudden jumps) if np.random.random() < contamination_level * 0.3: n_electrode_artifacts = int(contamination_level * 5) for _ in range(n_electrode_artifacts): idx = np.random.randint(0, n) jump_amplitude = np.random.choice([-1, 1]) * 100 contaminated[idx:] += jump_amplitude # Persistent offset return contaminated
[docs] def add_ecg_artifacts( data: np.ndarray, contamination_level: float, seed: Optional[int] = None ) -> np.ndarray: """ Add ECG-specific artifacts to the signal. Parameters: ----------- data : np.ndarray Original ECG signal contamination_level : float Level of contamination seed : int, optional Random seed for reproducibility Returns: -------- np.ndarray ECG signal with artifacts """ if seed is not None: np.random.seed(seed) contaminated = data.copy() n = len(data) # Baseline wander (slow drift) if np.random.random() < contamination_level: # Generate slow sinusoidal drift t = np.arange(n) drift_freq = 0.5 # Very low frequency baseline_wander = 0.5 * np.sin(2 * np.pi * drift_freq * t / 250.0) contaminated += baseline_wander # Muscle noise (high frequency) if np.random.random() < contamination_level * 0.7: # Add high-frequency noise muscle_noise = np.random.randn(n) * 0.1 contaminated += muscle_noise # Motion artifacts (sudden changes) if np.random.random() < contamination_level * 0.4: n_motion_artifacts = int(contamination_level * 10) for _ in range(n_motion_artifacts): start_idx = np.random.randint(0, n - 100) duration = np.random.randint(20, 200) end_idx = min(n, start_idx + duration) # Motion artifact (sudden change in amplitude) motion_artifact = np.random.choice([-0.5, 0.5]) * np.ones( end_idx - start_idx ) contaminated[start_idx:end_idx] += motion_artifact return contaminated
[docs] def add_respiratory_artifacts( data: np.ndarray, contamination_level: float, seed: Optional[int] = None ) -> np.ndarray: """ Add respiratory-specific artifacts to the signal. Parameters: ----------- data : np.ndarray Original respiratory signal contamination_level : float Level of contamination seed : int, optional Random seed for reproducibility Returns: -------- np.ndarray Respiratory signal with artifacts """ if seed is not None: np.random.seed(seed) contaminated = data.copy() n = len(data) # Cough artifacts (sudden spikes) if np.random.random() < contamination_level * 0.3: n_coughs = int(contamination_level * 5) for _ in range(n_coughs): idx = np.random.randint(0, n - 50) duration = np.random.randint(5, 20) end_idx = min(n, idx + duration) # Cough artifact (sudden spike) cough_artifact = ( np.exp(-np.arange(end_idx - idx) / 5) * np.random.choice([-1, 1]) * 2 ) contaminated[idx:end_idx] += cough_artifact # Speaking artifacts (irregular breathing) if np.random.random() < contamination_level * 0.5: # Add irregular breathing patterns irregular_component = 0.3 * np.random.randn(n) contaminated += irregular_component # Movement artifacts (baseline shifts) if np.random.random() < contamination_level * 0.4: n_movements = int(contamination_level * 3) for _ in range(n_movements): idx = np.random.randint(0, n - 100) shift_amplitude = np.random.choice([-0.5, 0.5]) contaminated[idx:] += shift_amplitude return contaminated
[docs] def generate_biomedical_scenario( scenario_type: str, n: int, hurst: float, contamination_level: float = 0.1, auto_contamination: Optional[str] = None, seed: Optional[int] = None, **kwargs, ) -> np.ndarray: """ Generate biomedical time series data for a specific scenario. Parameters: ----------- scenario_type : str Type of biomedical scenario ('eeg', 'ecg', 'respiratory') n : int Length of the time series hurst : float Hurst exponent contamination_level : float Level of contamination/artifacts auto_contamination : str, optional Automatic contamination type to apply (e.g., 'parkinsonian_tremor', 'epileptic_spike') **kwargs Additional parameters for specific scenarios Returns: -------- np.ndarray Biomedical time series data """ if scenario_type == "eeg": eeg_scenario = kwargs.get("eeg_scenario", "rest") data = generate_eeg_scenario(n, hurst, eeg_scenario, contamination_level, seed) # Apply automatic contamination if specified if auto_contamination: from .generation import add_contamination data = add_contamination(data, auto_contamination, contamination_level) return data elif scenario_type == "ecg": heart_rate = kwargs.get("heart_rate", 70.0) data = generate_ecg_scenario(n, hurst, heart_rate, contamination_level, seed) # Apply automatic contamination if specified if auto_contamination: from .generation import add_contamination data = add_contamination(data, auto_contamination, contamination_level) return data elif scenario_type == "respiratory": breathing_rate = kwargs.get("breathing_rate", 15.0) data = generate_respiratory_scenario( n, hurst, breathing_rate, contamination_level, seed ) # Apply automatic contamination if specified if auto_contamination: from .generation import add_contamination data = add_contamination(data, auto_contamination, contamination_level) return data else: raise ValueError(f"Unknown biomedical scenario: {scenario_type}")
[docs] def generate_biomedical_grid( scenario_type: str, hurst_values: List[float], lengths: List[int], contamination_levels: List[float], **kwargs, ) -> List[TimeSeriesSample]: """ Generate a grid of biomedical time series samples. Parameters: ----------- scenario_type : str Type of biomedical scenario hurst_values : List[float] List of Hurst exponents lengths : List[int] List of time series lengths contamination_levels : List[float] List of contamination levels **kwargs Additional parameters for specific scenarios Returns: -------- List[TimeSeriesSample] List of biomedical time series samples """ samples = [] sample_id = 0 for hurst in hurst_values: for length in lengths: for contamination_level in contamination_levels: # Generate biomedical time series data = generate_biomedical_scenario( scenario_type, length, hurst, contamination_level, **kwargs ) # Create sample sample = TimeSeriesSample( data=data, true_hurst=hurst, length=length, contamination=f"{scenario_type}_contamination_{contamination_level}", seed=sample_id, ) samples.append(sample) sample_id += 1 return samples
# Predefined biomedical scenarios BIOMEDICAL_SCENARIOS = { "eeg_rest": { "scenario_type": "eeg", "eeg_scenario": "rest", "hurst_range": (0.5, 0.8), "typical_amplitude": 75.0, "noise_level": 0.1, }, "eeg_eyes_closed": { "scenario_type": "eeg", "eeg_scenario": "eyes_closed", "hurst_range": (0.6, 0.9), "typical_amplitude": 80.0, "noise_level": 0.08, }, "eeg_sleep": { "scenario_type": "eeg", "eeg_scenario": "sleep", "hurst_range": (0.7, 0.95), "typical_amplitude": 100.0, "noise_level": 0.05, }, "eeg_parkinsonian": { "scenario_type": "eeg", "eeg_scenario": "parkinsonian", "hurst_range": (0.3, 0.6), "typical_amplitude": 120.0, "noise_level": 0.2, "auto_contamination": "parkinsonian_tremor", }, "eeg_epileptic": { "scenario_type": "eeg", "eeg_scenario": "epileptic", "hurst_range": (0.2, 0.5), "typical_amplitude": 150.0, "noise_level": 0.25, "auto_contamination": "epileptic_spike", }, "eeg_parkinsonian_avalanche": { "scenario_type": "eeg", "eeg_scenario": "parkinsonian", "hurst_range": (0.2, 0.4), "typical_amplitude": 140.0, "noise_level": 0.3, "auto_contamination": "neural_avalanche", }, "eeg_epileptic_heavy_tail": { "scenario_type": "eeg", "eeg_scenario": "epileptic", "hurst_range": (0.1, 0.4), "typical_amplitude": 180.0, "noise_level": 0.35, "auto_contamination": "heavy_tail", }, "eeg_burst_suppression": { "scenario_type": "eeg", "eeg_scenario": "sleep", "hurst_range": (0.4, 0.7), "typical_amplitude": 90.0, "noise_level": 0.15, "auto_contamination": "burst_suppression", }, "ecg_normal": { "scenario_type": "ecg", "heart_rate": 70.0, "hurst_range": (0.3, 0.6), "typical_amplitude": 2.0, "noise_level": 0.1, }, "ecg_tachycardia": { "scenario_type": "ecg", "heart_rate": 120.0, "hurst_range": (0.2, 0.5), "typical_amplitude": 2.5, "noise_level": 0.15, }, "respiratory_rest": { "scenario_type": "respiratory", "breathing_rate": 15.0, "hurst_range": (0.4, 0.7), "typical_amplitude": 0.5, "noise_level": 0.1, }, }