sounddevice 기반 마이크 오디오 녹음기 — 콜백 레벨 모니터링 포함

02-reusable-code-python/utils/audio_recorder.py
reusable
python
"""
sounddevice 기반 마이크 오디오 녹음기 — 콜백 레벨 모니터링 포함

@source: 260313 voice-to-text-v2
@extracted: 2026-03-14
@description: sounddevice를 사용한 실시간 마이크 녹음. WAV 바이트 출력,
              오디오 레벨 콜백(시각화용), numpy 배열 변환 지원.

의존성:
    - sounddevice (pip install sounddevice)
    - numpy

사용법:
    from utils.audio_recorder import AudioRecorder

    recorder = AudioRecorder(sample_rate=16000, channels=1)

    # 오디오 레벨 콜백 (파형 시각화용)
    recorder.set_level_callback(lambda level: print(f"Level: {level:.4f}"))

    recorder.start()
    # ... 녹음 중 ...
    wav_bytes = recorder.stop()  # WAV 바이트 반환

    # numpy 배열로 변환
    audio_array = recorder.get_audio_data()

    # 녹음 시간 확인
    print(f"Duration: {recorder.get_duration():.1f}s")
"""

import io
import wave
import logging
from typing import Optional, Callable

import numpy as np

logger = logging.getLogger(__name__)

# sounddevice 선택적 import
try:
    import sounddevice as sd
    AUDIO_AVAILABLE = True
except ImportError:
    AUDIO_AVAILABLE = False
    logger.warning("sounddevice 미설치. 오디오 녹음 불가. pip install sounddevice")

# 기본 설정값
DEFAULT_SAMPLE_RATE = 16000
DEFAULT_CHANNELS = 1
DEFAULT_CHUNK_SIZE = 1024


class AudioRecorder:
    """
    sounddevice 기반 마이크 녹음기.

    특징:
        - 콜백 기반 비동기 녹음 (별도 스레드)
        - 실시간 오디오 레벨 콜백 (파형 시각화용)
        - WAV 바이트 또는 numpy 배열로 결과 반환
        - 녹음 시간 추적

    Args:
        sample_rate: 샘플레이트 (기본: 16000)
        channels: 채널 수 (기본: 1, 모노)
        chunk_size: 오디오 블록 크기 (기본: 1024)
    """

    def __init__(
        self,
        sample_rate: int = DEFAULT_SAMPLE_RATE,
        channels: int = DEFAULT_CHANNELS,
        chunk_size: int = DEFAULT_CHUNK_SIZE,
    ):
        self.sample_rate = sample_rate
        self.channels = channels
        self.chunk_size = chunk_size

        self._is_recording = False
        self._audio_frames: list[bytes] = []
        self._stream = None
        self._level_callback: Optional[Callable[[float], None]] = None

    @property
    def is_recording(self) -> bool:
        """현재 녹음 중 여부."""
        return self._is_recording

    @property
    def is_available(self) -> bool:
        """sounddevice 사용 가능 여부."""
        return AUDIO_AVAILABLE

    def set_level_callback(self, callback: Callable[[float], None]) -> None:
        """
        오디오 레벨 업데이트 콜백 설정 (파형 시각화용).

        Args:
            callback: float(0.0~1.0) 레벨 값을 받는 콜백 함수
        """
        self._level_callback = callback

    def start(self) -> bool:
        """
        마이크 녹음 시작.

        Returns:
            시작 성공 여부
        """
        if not AUDIO_AVAILABLE:
            logger.error("sounddevice 미설치")
            return False

        if self._is_recording:
            return True

        try:
            self._audio_frames = []
            self._is_recording = True

            self._stream = sd.InputStream(
                samplerate=self.sample_rate,
                channels=self.channels,
                dtype='float32',
                blocksize=self.chunk_size,
                callback=self._audio_callback,
            )
            self._stream.start()

            logger.info("녹음 시작")
            return True

        except Exception as e:
            logger.error(f"녹음 시작 실패: {e}")
            self._is_recording = False
            self._cleanup()
            return False

    def _audio_callback(self, indata, frames, time_info, status) -> None:
        """sounddevice 오디오 블록 콜백 (내부)."""
        if status:
            logger.warning(f"오디오 상태: {status}")

        if not self._is_recording:
            return

        data = indata.copy().tobytes()
        self._audio_frames.append(data)

        # 오디오 레벨 계산 (UI 시각화용)
        if self._level_callback:
            level = float(np.abs(indata).mean())
            self._level_callback(level)

    def stop(self) -> Optional[bytes]:
        """
        녹음 중지 및 WAV 바이트 반환.

        Returns:
            WAV 포맷 바이트 (녹음 데이터 없으면 None)
        """
        if not self._is_recording:
            return None

        self._is_recording = False
        wav_bytes = self._build_wav()
        self._cleanup()
        logger.info(f"녹음 중지. 프레임 수: {len(self._audio_frames)}")
        return wav_bytes

    def _build_wav(self) -> Optional[bytes]:
        """녹음된 프레임을 WAV 파일로 조립 (내부)."""
        if not self._audio_frames:
            return None

        try:
            wav_buffer = io.BytesIO()
            with wave.open(wav_buffer, "wb") as wf:
                wf.setnchannels(self.channels)
                wf.setsampwidth(4)  # float32 = 4 bytes
                wf.setframerate(self.sample_rate)
                wf.writeframes(b"".join(self._audio_frames))
            return wav_buffer.getvalue()
        except Exception as e:
            logger.error(f"WAV 생성 실패: {e}")
            return None

    def get_audio_data(self) -> Optional[np.ndarray]:
        """
        녹음된 오디오를 numpy float32 배열로 반환.

        Returns:
            float32 numpy 배열 (녹음 데이터 없으면 None)
        """
        if not self._audio_frames:
            return None

        raw = b"".join(self._audio_frames)
        return np.frombuffer(raw, dtype=np.float32)

    def get_duration(self) -> float:
        """
        현재 녹음 시간 (초).

        Returns:
            녹음 시간 (초 단위)
        """
        if not self._audio_frames:
            return 0.0
        total_samples = len(self._audio_frames) * self.chunk_size
        return total_samples / self.sample_rate

    def _cleanup(self) -> None:
        """오디오 리소스 정리 (내부)."""
        try:
            if self._stream:
                self._stream.stop()
                self._stream.close()
                self._stream = None
        except Exception:
            pass

    def __del__(self):
        self._cleanup()