Audio-Classifier/backend/src/api/routes/library.py

"""Library management endpoints."""
from fastapi import APIRouter, Depends, HTTPException, BackgroundTasks
from sqlalchemy.orm import Session
from pathlib import Path
from typing import Optional
import os

from ...models.database import get_db
from ...models.schema import AudioTrack
from ...core.audio_processor import extract_all_features
from ...core.essentia_classifier import EssentiaClassifier
from ...core.transcoder import AudioTranscoder
from ...core.waveform_generator import save_waveform_to_file
from ...utils.logging import get_logger
from ...utils.config import settings

router = APIRouter()
logger = get_logger(__name__)

# Supported audio formats
AUDIO_EXTENSIONS = {'.mp3', '.wav', '.flac', '.m4a', '.aac', '.ogg', '.wma'}

# Global scan status
scan_status = {
    "is_scanning": False,
    "progress": 0,
    "total_files": 0,
    "processed": 0,
    "errors": 0,
    "current_file": None,
}


def find_audio_files(directory: str) -> list[Path]:
    """Find all audio files in directory and subdirectories."""
    audio_files = []
    directory_path = Path(directory)

    if not directory_path.exists():
        logger.error(f"Directory does not exist: {directory}")
        return []

    for root, dirs, files in os.walk(directory_path):
        # Skip transcoded and waveforms directories
        dirs[:] = [d for d in dirs if d not in ['transcoded', 'waveforms', '.transcoded', '.waveforms']]

        for file in files:
            file_path = Path(root) / file
            if file_path.suffix.lower() in AUDIO_EXTENSIONS:
                audio_files.append(file_path)

    return audio_files


def scan_library_task(directory: str, db: Session):
    """Background task to scan library."""
    global scan_status

    try:
        scan_status["is_scanning"] = True
        scan_status["progress"] = 0
        scan_status["processed"] = 0
        scan_status["errors"] = 0
        scan_status["current_file"] = None

        # Find audio files
        logger.info(f"Scanning directory: {directory}")
        audio_files = find_audio_files(directory)
        scan_status["total_files"] = len(audio_files)

        if not audio_files:
            logger.warning("No audio files found!")
            scan_status["is_scanning"] = False
            return

        # Initialize classifier and transcoder
        logger.info("Initializing Essentia classifier...")
        classifier = EssentiaClassifier()

        logger.info("Initializing audio transcoder...")
        transcoder = AudioTranscoder()

        if not transcoder.check_ffmpeg_available():
            logger.error("FFmpeg is required for transcoding.")
            scan_status["is_scanning"] = False
            scan_status["errors"] = 1
            return

        # Process each file
        for i, file_path in enumerate(audio_files, 1):
            scan_status["current_file"] = str(file_path)
            scan_status["progress"] = int((i / len(audio_files)) * 100)

            try:
                logger.info(f"[{i}/{len(audio_files)}] Processing: {file_path.name}")

                # Check if already in database
                existing = db.query(AudioTrack).filter(
                    AudioTrack.filepath == str(file_path)
                ).first()

                if existing:
                    # Check if needs transcoding/waveform
                    needs_update = False

                    if not existing.stream_filepath or not Path(existing.stream_filepath).exists():
                        logger.info(f"  → Needs transcoding: {file_path.name}")
                        needs_update = True

                        # Transcode to MP3 128kbps
                        stream_path = transcoder.transcode_to_mp3(
                            str(file_path),
                            bitrate="128k",
                            overwrite=False
                        )
                        if stream_path:
                            existing.stream_filepath = stream_path

                    if not existing.waveform_filepath or not Path(existing.waveform_filepath).exists():
                        logger.info(f"  → Needs waveform: {file_path.name}")
                        needs_update = True

                        # Pre-compute waveform
                        waveform_dir = file_path.parent / "waveforms"
                        waveform_dir.mkdir(parents=True, exist_ok=True)
                        waveform_path = waveform_dir / f"{file_path.stem}.waveform.json"

                        if save_waveform_to_file(str(file_path), str(waveform_path), num_peaks=800):
                            existing.waveform_filepath = str(waveform_path)

                    if needs_update:
                        db.commit()
                        logger.info(f"✓ Updated: {file_path.name}")
                    else:
                        logger.info(f"Already complete, skipping: {file_path.name}")

                    scan_status["processed"] += 1
                    continue

                # Extract features
                features = extract_all_features(str(file_path))

                # Get classifications
                genre_result = classifier.predict_genre(str(file_path))
                mood_result = classifier.predict_mood(str(file_path))
                instruments = classifier.predict_instruments(str(file_path))

                # Transcode to MP3 128kbps
                logger.info("  → Transcoding to MP3 128kbps...")
                stream_path = transcoder.transcode_to_mp3(
                    str(file_path),
                    bitrate="128k",
                    overwrite=False
                )

                # Pre-compute waveform
                logger.info("  → Generating waveform...")
                waveform_dir = file_path.parent / "waveforms"
                waveform_dir.mkdir(parents=True, exist_ok=True)
                waveform_path = waveform_dir / f"{file_path.stem}.waveform.json"

                waveform_success = save_waveform_to_file(
                    str(file_path),
                    str(waveform_path),
                    num_peaks=800
                )

                # Create track record
                track = AudioTrack(
                    filepath=str(file_path),
                    stream_filepath=stream_path,
                    waveform_filepath=str(waveform_path) if waveform_success else None,
                    filename=file_path.name,
                    duration_seconds=features['duration_seconds'],
                    tempo_bpm=features['tempo_bpm'],
                    key=features['key'],
                    time_signature=features['time_signature'],
                    energy=features['energy'],
                    danceability=features['danceability'],
                    valence=features['valence'],
                    loudness_lufs=features['loudness_lufs'],
                    spectral_centroid=features['spectral_centroid'],
                    zero_crossing_rate=features['zero_crossing_rate'],
                    genre_primary=genre_result['primary'],
                    genre_secondary=genre_result['secondary'],
                    genre_confidence=genre_result['confidence'],
                    mood_primary=mood_result['primary'],
                    mood_secondary=mood_result['secondary'],
                    mood_arousal=mood_result['arousal'],
                    mood_valence=mood_result['valence'],
                    instruments=[i['name'] for i in instruments[:5]],
                )

                db.add(track)
                db.commit()

                scan_status["processed"] += 1
                logger.info(f"✓ Added: {file_path.name}")

            except Exception as e:
                logger.error(f"Failed to process {file_path}: {e}")
                scan_status["errors"] += 1
                db.rollback()

        # Scan complete
        logger.info("=" * 60)
        logger.info(f"Scan complete!")
        logger.info(f"  Total files: {len(audio_files)}")
        logger.info(f"  Processed: {scan_status['processed']}")
        logger.info(f"  Errors: {scan_status['errors']}")
        logger.info("=" * 60)

    except Exception as e:
        logger.error(f"Scan failed: {e}")
        scan_status["errors"] += 1

    finally:
        scan_status["is_scanning"] = False
        scan_status["current_file"] = None


@router.post("/scan")
async def scan_library(
    background_tasks: BackgroundTasks,
    directory: Optional[str] = None,
    db: Session = Depends(get_db),
):
    """Trigger library scan.

    Args:
        background_tasks: FastAPI background tasks
        directory: Directory to scan (defaults to MUSIC_DIR from settings)
        db: Database session

    Returns:
        Scan status

    Raises:
        HTTPException: 400 if scan already in progress or directory invalid
    """
    global scan_status

    if scan_status["is_scanning"]:
        raise HTTPException(
            status_code=400,
            detail="Scan already in progress"
        )

    # Use default music directory if not provided
    scan_dir = directory if directory else "/audio"

    if not Path(scan_dir).exists():
        raise HTTPException(
            status_code=400,
            detail=f"Directory does not exist: {scan_dir}"
        )

    # Start scan in background
    background_tasks.add_task(scan_library_task, scan_dir, db)

    return {
        "message": "Library scan started",
        "directory": scan_dir,
        "status": scan_status
    }


@router.get("/scan/status")
async def get_scan_status():
    """Get current scan status.

    Returns:
        Current scan status
    """
    return scan_status