"""
Silero VAD wrapper using onnx-asr library
"""
import numpy as np
import onnx_asr
from typing import Optional, Tuple
import logging

logger = logging.getLogger(__name__)


class SileroVAD:
    """
    Voice Activity Detection using Silero VAD via onnx-asr.
    """
    
    def __init__(
        self,
        providers: Optional[list] = None,
        threshold: float = 0.5,
        min_speech_duration_ms: int = 250,
        min_silence_duration_ms: int = 100,
        window_size_samples: int = 512,
        speech_pad_ms: int = 30,
    ):
        """
        Initialize Silero VAD.
        
        Args:
            providers: Optional ONNX runtime providers
            threshold: Speech probability threshold (0.0-1.0)
            min_speech_duration_ms: Minimum duration of speech segment
            min_silence_duration_ms: Minimum duration of silence to split segments
            window_size_samples: Window size for VAD processing
            speech_pad_ms: Padding around speech segments
        """
        if providers is None:
            providers = [
                "CUDAExecutionProvider",
                "CPUExecutionProvider",
            ]
        
        logger.info("Loading Silero VAD model...")
        self.vad = onnx_asr.load_vad("silero", providers=providers)
        
        # VAD parameters
        self.threshold = threshold
        self.min_speech_duration_ms = min_speech_duration_ms
        self.min_silence_duration_ms = min_silence_duration_ms
        self.window_size_samples = window_size_samples
        self.speech_pad_ms = speech_pad_ms
        
        logger.info("Silero VAD initialized successfully")
    
    def detect_speech(
        self,
        audio: np.ndarray,
        sample_rate: int = 16000,
    ) -> list:
        """
        Detect speech segments in audio.
        
        Args:
            audio: Audio data as numpy array (float32)
            sample_rate: Sample rate of audio
            
        Returns:
            List of tuples (start_sample, end_sample) for speech segments
        """
        # Note: The actual VAD processing is typically done within
        # the onnx_asr model.with_vad() method, but we provide
        # this interface for direct VAD usage
        
        # For direct VAD detection, you would use the vad model directly
        # However, onnx-asr integrates VAD into the recognition pipeline
        # So this is mainly for compatibility
        
        logger.warning("Direct VAD detection - consider using model.with_vad() instead")
        return []
    
    def is_speech(
        self,
        audio_chunk: np.ndarray,
        sample_rate: int = 16000,
    ) -> Tuple[bool, float]:
        """
        Check if audio chunk contains speech.
        
        Args:
            audio_chunk: Audio chunk as numpy array (float32)
            sample_rate: Sample rate
            
        Returns:
            Tuple of (is_speech: bool, probability: float)
        """
        # Placeholder for direct VAD probability check
        # In practice, use model.with_vad() for automatic segmentation
        logger.warning("Direct speech detection not implemented - use model.with_vad()")
        return False, 0.0
    
    def get_vad(self):
        """
        Get the underlying onnx_asr VAD model.
        
        Returns:
            The onnx_asr VAD model instance
        """
        return self.vad


# Convenience function
def load_vad(**kwargs):
    """Load and return Silero VAD with given configuration."""
    return SileroVAD(**kwargs)