Source code for neurological_lrd_analysis.ml_baselines.inference

"""
Inference System for Pretrained ML Models.

This module provides a high-level interface for using pretrained models
for Hurst exponent estimation, including batch processing, ensemble predictions,
and uncertainty quantification.

Author: Davian R. Chin (PhD Candidate in Biomedical Engineering, University of Reading, UK)
"""

import numpy as np
from typing import Dict, List, Optional, Tuple, Union, Any
from dataclasses import dataclass
import warnings
from pathlib import Path

from .pretrained_models import PretrainedModelManager, ModelMetadata, ModelStatus
from .feature_extraction import TimeSeriesFeatureExtractor
from .ml_estimators import MLBaselineType, BaseMLEstimator


@dataclass
class PredictionResult:
    """Result from model prediction."""
    hurst_estimate: float
    confidence_interval: Optional[Tuple[float, float]] = None
    uncertainty: Optional[float] = None
    model_id: Optional[str] = None
    model_type: Optional[str] = None
    prediction_time: Optional[float] = None
    feature_contributions: Optional[Dict[str, float]] = None


@dataclass
class EnsembleResult:
    """Result from ensemble prediction."""
    mean_estimate: float
    std_estimate: float
    individual_predictions: List[float]
    model_weights: Optional[Dict[str, float]] = None
    confidence_interval: Optional[Tuple[float, float]] = None


[docs] class PretrainedInference: """ High-level interface for pretrained model inference. Provides easy-to-use methods for Hurst exponent estimation using pretrained ML models with support for single predictions, batch processing, and ensemble methods. """
[docs] def __init__(self, models_dir: Union[str, Path] = "pretrained_models"): """ Initialize the inference system. Parameters: ----------- models_dir : str or Path Directory containing pretrained models """ self.manager = PretrainedModelManager(models_dir) self._feature_extractor = None self._model_cache = {}
def _get_feature_extractor(self, model_metadata: ModelMetadata) -> TimeSeriesFeatureExtractor: """Get or create feature extractor with model-specific configuration.""" config_key = str(model_metadata.feature_extractor_config) if config_key not in self._model_cache: self._model_cache[config_key] = TimeSeriesFeatureExtractor( **model_metadata.feature_extractor_config ) return self._model_cache[config_key]
[docs] def predict_single(self, data: np.ndarray, model_id: Optional[str] = None, model_type: Optional[MLBaselineType] = None, use_best: bool = True) -> PredictionResult: """ Predict Hurst exponent for a single time series. Parameters: ----------- data : np.ndarray Time series data model_id : str, optional Specific model ID to use model_type : MLBaselineType, optional Type of model to use (will select best if multiple available) use_best : bool Whether to use the best performing model if model_id not specified Returns: -------- PredictionResult Prediction result with metadata """ import time start_time = time.time() # Select model if model_id is not None: estimator, metadata = self.manager.load_model(model_id) elif model_type is not None: if use_best: estimator, metadata = self.manager.get_best_model(model_type) else: # Get first available model of this type models = self.manager.list_models(model_type=model_type, status=ModelStatus.TRAINED) if not models: raise ValueError(f"No trained models of type {model_type.value} found") estimator, metadata = self.manager.load_model(models[0].model_id) else: # Use best overall model all_models = self.manager.list_models(status=ModelStatus.TRAINED) if not all_models: raise ValueError("No trained models available") # Find best model by validation score best_model = max(all_models, key=lambda x: x.performance_metrics.get('validation_score', -np.inf)) estimator, metadata = self.manager.load_model(best_model.model_id) # Extract features feature_extractor = self._get_feature_extractor(metadata) features = feature_extractor.extract_features(data) # Make prediction prediction = estimator.predict(features.combined.reshape(1, -1)) prediction_time = time.time() - start_time return PredictionResult( hurst_estimate=prediction.hurst_estimate, confidence_interval=prediction.confidence_interval, uncertainty=prediction.prediction_uncertainty, model_id=metadata.model_id, model_type=metadata.model_type.value, prediction_time=prediction_time, feature_contributions=prediction.feature_contributions )
[docs] def predict_batch(self, data_list: List[np.ndarray], model_id: Optional[str] = None, model_type: Optional[MLBaselineType] = None, use_best: bool = True, show_progress: bool = True) -> List[PredictionResult]: """ Predict Hurst exponents for multiple time series. Parameters: ----------- data_list : List[np.ndarray] List of time series data model_id : str, optional Specific model ID to use model_type : MLBaselineType, optional Type of model to use use_best : bool Whether to use the best performing model show_progress : bool Whether to show progress during batch processing Returns: -------- List[PredictionResult] List of prediction results """ results = [] for i, data in enumerate(data_list): if show_progress and i % 10 == 0: print(f"Processing {i+1}/{len(data_list)} time series...") try: result = self.predict_single(data, model_id, model_type, use_best) results.append(result) except Exception as e: warnings.warn(f"Failed to predict for time series {i}: {e}") # Create a failed result results.append(PredictionResult( hurst_estimate=np.nan, model_id=model_id, model_type=model_type.value if model_type else None )) if show_progress: print(f"Completed batch prediction: {len(results)} results") return results
[docs] def predict_ensemble(self, data: np.ndarray, model_types: Optional[List[MLBaselineType]] = None, weights: Optional[Dict[str, float]] = None, include_uncertainty: bool = True) -> EnsembleResult: """ Predict using ensemble of models. Parameters: ----------- data : np.ndarray Time series data model_types : List[MLBaselineType], optional Types of models to include in ensemble weights : Dict[str, float], optional Weights for each model type include_uncertainty : bool Whether to include uncertainty quantification Returns: -------- EnsembleResult Ensemble prediction result """ if model_types is None: model_types = list(MLBaselineType) # Get available models available_models = [] for model_type in model_types: models = self.manager.list_models(model_type=model_type, status=ModelStatus.TRAINED) if models: available_models.extend(models) if not available_models: raise ValueError("No trained models available for ensemble") # Make predictions with all models predictions = [] model_weights = {} for model in available_models: try: result = self.predict_single(data, model_id=model.model_id) predictions.append(result.hurst_estimate) # Set weight based on model performance if weights and model.model_type.value in weights: model_weights[model.model_id] = weights[model.model_type.value] else: # Use validation score as weight model_weights[model.model_id] = model.performance_metrics.get('validation_score', 0.0) except Exception as e: warnings.warn(f"Failed to predict with model {model.model_id}: {e}") continue if not predictions: raise ValueError("No successful predictions for ensemble") predictions = np.array(predictions) weights_array = np.array([model_weights.get(model.model_id, 1.0) for model in available_models[:len(predictions)]]) # Normalize weights if np.sum(weights_array) > 0: weights_array = weights_array / np.sum(weights_array) else: weights_array = np.ones(len(predictions)) / len(predictions) # Calculate ensemble statistics mean_estimate = np.average(predictions, weights=weights_array) std_estimate = np.sqrt(np.average((predictions - mean_estimate)**2, weights=weights_array)) # Calculate confidence interval confidence_interval = None if include_uncertainty and len(predictions) > 1: # Use t-distribution for confidence interval from scipy import stats alpha = 0.05 n = len(predictions) t_val = stats.t.ppf(1 - alpha/2, n-1) margin = t_val * std_estimate / np.sqrt(n) confidence_interval = (mean_estimate - margin, mean_estimate + margin) return EnsembleResult( mean_estimate=mean_estimate, std_estimate=std_estimate, individual_predictions=predictions.tolist(), model_weights=model_weights, confidence_interval=confidence_interval )
[docs] def compare_models(self, data: np.ndarray, model_types: Optional[List[MLBaselineType]] = None) -> Dict[str, PredictionResult]: """ Compare predictions from different model types. Parameters: ----------- data : np.ndarray Time series data model_types : List[MLBaselineType], optional Types of models to compare Returns: -------- Dict[str, PredictionResult] Predictions from each model type """ if model_types is None: model_types = list(MLBaselineType) results = {} for model_type in model_types: try: result = self.predict_single(data, model_type=model_type, use_best=True) results[model_type.value] = result except Exception as e: warnings.warn(f"Failed to predict with {model_type.value}: {e}") results[model_type.value] = PredictionResult( hurst_estimate=np.nan, model_type=model_type.value ) return results
[docs] def get_model_info(self, model_id: Optional[str] = None) -> Union[ModelMetadata, List[ModelMetadata]]: """ Get information about available models. Parameters: ----------- model_id : str, optional Specific model ID, or None for all models Returns: -------- Union[ModelMetadata, List[ModelMetadata]] Model metadata """ if model_id is not None: if model_id not in self.manager._metadata_registry: raise ValueError(f"Model {model_id} not found") return self.manager._metadata_registry[model_id] else: return self.manager.list_models(status=ModelStatus.TRAINED)
[docs] def benchmark_models(self, test_data: List[np.ndarray], true_hurst: List[float], model_types: Optional[List[MLBaselineType]] = None) -> Dict[str, Dict[str, float]]: """ Benchmark model performance on test data. Parameters: ----------- test_data : List[np.ndarray] Test time series data true_hurst : List[float] True Hurst exponents model_types : List[MLBaselineType], optional Types of models to benchmark Returns: -------- Dict[str, Dict[str, float]] Performance metrics for each model type """ if model_types is None: model_types = list(MLBaselineType) results = {} for model_type in model_types: try: # Get predictions predictions = [] for data in test_data: result = self.predict_single(data, model_type=model_type, use_best=True) predictions.append(result.hurst_estimate) predictions = np.array(predictions) true_values = np.array(true_hurst) # Calculate metrics mse = np.mean((predictions - true_values) ** 2) mae = np.mean(np.abs(predictions - true_values)) r2 = 1 - mse / np.var(true_values) correlation = np.corrcoef(predictions, true_values)[0, 1] results[model_type.value] = { 'mse': float(mse), 'mae': float(mae), 'r2': float(r2), 'correlation': float(correlation), 'rmse': float(np.sqrt(mse)) } except Exception as e: warnings.warn(f"Failed to benchmark {model_type.value}: {e}") results[model_type.value] = { 'mse': np.nan, 'mae': np.nan, 'r2': np.nan, 'correlation': np.nan, 'rmse': np.nan } return results
[docs] def quick_predict(data: np.ndarray, models_dir: Union[str, Path] = "pretrained_models", model_type: Optional[MLBaselineType] = None) -> float: """ Quick prediction function for single time series. Parameters: ----------- data : np.ndarray Time series data models_dir : str or Path Directory containing pretrained models model_type : MLBaselineType, optional Type of model to use Returns: -------- float Predicted Hurst exponent """ inference = PretrainedInference(models_dir) result = inference.predict_single(data, model_type=model_type) return result.hurst_estimate
[docs] def quick_ensemble_predict(data: np.ndarray, models_dir: Union[str, Path] = "pretrained_models", model_types: Optional[List[MLBaselineType]] = None) -> Tuple[float, float]: """ Quick ensemble prediction function. Parameters: ----------- data : np.ndarray Time series data models_dir : str or Path Directory containing pretrained models model_types : List[MLBaselineType], optional Types of models to include in ensemble Returns: -------- Tuple[float, float] (mean_estimate, std_estimate) """ inference = PretrainedInference(models_dir) result = inference.predict_ensemble(data, model_types=model_types) return result.mean_estimate, result.std_estimate