Files
Audio-Classifier/backend/src/api/routes/library.py
Benoit b74c6b0b40
All checks were successful
Build and Push Docker Images / Build Frontend Image (push) Successful in 57s
Fix scan infini: exclure dossiers transcoded et waveforms
Problème: Le scanner scannait TOUS les dossiers, y compris les dossiers
générés (transcoded/ et waveforms/), créant:
1. Boucle infinie: scan original → crée transcoded → re-scan transcoded
2. Segfaults: tentative de transcoder des fichiers déjà transcodés
3. Doublons en base de données

Solution:
- library.py: Exclut transcoded, waveforms, .transcoded, .waveforms
- scanner.py: Même exclusion dans le CLI

Technique: Modifie dirs[:] dans os.walk() pour skip ces dossiers.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-26 00:08:13 +01:00

276 lines
9.5 KiB
Python

"""Library management endpoints."""
from fastapi import APIRouter, Depends, HTTPException, BackgroundTasks
from sqlalchemy.orm import Session
from pathlib import Path
from typing import Optional
import os
from ...models.database import get_db
from ...models.schema import AudioTrack
from ...core.audio_processor import extract_all_features
from ...core.essentia_classifier import EssentiaClassifier
from ...core.transcoder import AudioTranscoder
from ...core.waveform_generator import save_waveform_to_file
from ...utils.logging import get_logger
from ...utils.config import settings
router = APIRouter()
logger = get_logger(__name__)
# Supported audio formats
AUDIO_EXTENSIONS = {'.mp3', '.wav', '.flac', '.m4a', '.aac', '.ogg', '.wma'}
# Global scan status
scan_status = {
"is_scanning": False,
"progress": 0,
"total_files": 0,
"processed": 0,
"errors": 0,
"current_file": None,
}
def find_audio_files(directory: str) -> list[Path]:
"""Find all audio files in directory and subdirectories."""
audio_files = []
directory_path = Path(directory)
if not directory_path.exists():
logger.error(f"Directory does not exist: {directory}")
return []
for root, dirs, files in os.walk(directory_path):
# Skip transcoded and waveforms directories
dirs[:] = [d for d in dirs if d not in ['transcoded', 'waveforms', '.transcoded', '.waveforms']]
for file in files:
file_path = Path(root) / file
if file_path.suffix.lower() in AUDIO_EXTENSIONS:
audio_files.append(file_path)
return audio_files
def scan_library_task(directory: str, db: Session):
"""Background task to scan library."""
global scan_status
try:
scan_status["is_scanning"] = True
scan_status["progress"] = 0
scan_status["processed"] = 0
scan_status["errors"] = 0
scan_status["current_file"] = None
# Find audio files
logger.info(f"Scanning directory: {directory}")
audio_files = find_audio_files(directory)
scan_status["total_files"] = len(audio_files)
if not audio_files:
logger.warning("No audio files found!")
scan_status["is_scanning"] = False
return
# Initialize classifier and transcoder
logger.info("Initializing Essentia classifier...")
classifier = EssentiaClassifier()
logger.info("Initializing audio transcoder...")
transcoder = AudioTranscoder()
if not transcoder.check_ffmpeg_available():
logger.error("FFmpeg is required for transcoding.")
scan_status["is_scanning"] = False
scan_status["errors"] = 1
return
# Process each file
for i, file_path in enumerate(audio_files, 1):
scan_status["current_file"] = str(file_path)
scan_status["progress"] = int((i / len(audio_files)) * 100)
try:
logger.info(f"[{i}/{len(audio_files)}] Processing: {file_path.name}")
# Check if already in database
existing = db.query(AudioTrack).filter(
AudioTrack.filepath == str(file_path)
).first()
if existing:
# Check if needs transcoding/waveform
needs_update = False
if not existing.stream_filepath or not Path(existing.stream_filepath).exists():
logger.info(f" → Needs transcoding: {file_path.name}")
needs_update = True
# Transcode to MP3 128kbps
stream_path = transcoder.transcode_to_mp3(
str(file_path),
bitrate="128k",
overwrite=False
)
if stream_path:
existing.stream_filepath = stream_path
if not existing.waveform_filepath or not Path(existing.waveform_filepath).exists():
logger.info(f" → Needs waveform: {file_path.name}")
needs_update = True
# Pre-compute waveform
waveform_dir = file_path.parent / "waveforms"
waveform_dir.mkdir(parents=True, exist_ok=True)
waveform_path = waveform_dir / f"{file_path.stem}.waveform.json"
if save_waveform_to_file(str(file_path), str(waveform_path), num_peaks=800):
existing.waveform_filepath = str(waveform_path)
if needs_update:
db.commit()
logger.info(f"✓ Updated: {file_path.name}")
else:
logger.info(f"Already complete, skipping: {file_path.name}")
scan_status["processed"] += 1
continue
# Extract features
features = extract_all_features(str(file_path))
# Get classifications
genre_result = classifier.predict_genre(str(file_path))
mood_result = classifier.predict_mood(str(file_path))
instruments = classifier.predict_instruments(str(file_path))
# Transcode to MP3 128kbps
logger.info(" → Transcoding to MP3 128kbps...")
stream_path = transcoder.transcode_to_mp3(
str(file_path),
bitrate="128k",
overwrite=False
)
# Pre-compute waveform
logger.info(" → Generating waveform...")
waveform_dir = file_path.parent / "waveforms"
waveform_dir.mkdir(parents=True, exist_ok=True)
waveform_path = waveform_dir / f"{file_path.stem}.waveform.json"
waveform_success = save_waveform_to_file(
str(file_path),
str(waveform_path),
num_peaks=800
)
# Create track record
track = AudioTrack(
filepath=str(file_path),
stream_filepath=stream_path,
waveform_filepath=str(waveform_path) if waveform_success else None,
filename=file_path.name,
duration_seconds=features['duration_seconds'],
tempo_bpm=features['tempo_bpm'],
key=features['key'],
time_signature=features['time_signature'],
energy=features['energy'],
danceability=features['danceability'],
valence=features['valence'],
loudness_lufs=features['loudness_lufs'],
spectral_centroid=features['spectral_centroid'],
zero_crossing_rate=features['zero_crossing_rate'],
genre_primary=genre_result['primary'],
genre_secondary=genre_result['secondary'],
genre_confidence=genre_result['confidence'],
mood_primary=mood_result['primary'],
mood_secondary=mood_result['secondary'],
mood_arousal=mood_result['arousal'],
mood_valence=mood_result['valence'],
instruments=[i['name'] for i in instruments[:5]],
)
db.add(track)
db.commit()
scan_status["processed"] += 1
logger.info(f"✓ Added: {file_path.name}")
except Exception as e:
logger.error(f"Failed to process {file_path}: {e}")
scan_status["errors"] += 1
db.rollback()
# Scan complete
logger.info("=" * 60)
logger.info(f"Scan complete!")
logger.info(f" Total files: {len(audio_files)}")
logger.info(f" Processed: {scan_status['processed']}")
logger.info(f" Errors: {scan_status['errors']}")
logger.info("=" * 60)
except Exception as e:
logger.error(f"Scan failed: {e}")
scan_status["errors"] += 1
finally:
scan_status["is_scanning"] = False
scan_status["current_file"] = None
@router.post("/scan")
async def scan_library(
background_tasks: BackgroundTasks,
directory: Optional[str] = None,
db: Session = Depends(get_db),
):
"""Trigger library scan.
Args:
background_tasks: FastAPI background tasks
directory: Directory to scan (defaults to MUSIC_DIR from settings)
db: Database session
Returns:
Scan status
Raises:
HTTPException: 400 if scan already in progress or directory invalid
"""
global scan_status
if scan_status["is_scanning"]:
raise HTTPException(
status_code=400,
detail="Scan already in progress"
)
# Use default music directory if not provided
scan_dir = directory if directory else "/audio"
if not Path(scan_dir).exists():
raise HTTPException(
status_code=400,
detail=f"Directory does not exist: {scan_dir}"
)
# Start scan in background
background_tasks.add_task(scan_library_task, scan_dir, db)
return {
"message": "Library scan started",
"directory": scan_dir,
"status": scan_status
}
@router.get("/scan/status")
async def get_scan_status():
"""Get current scan status.
Returns:
Current scan status
"""
return scan_status