From 75ff92f62f20132e29569d640d6780e46f0f343d Mon Sep 17 00:00:00 2001 From: Benoit Date: Tue, 2 Dec 2025 23:52:47 +0100 Subject: [PATCH] Add CLI scanner tool for audio library MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Create scanner.py to scan directories and analyze audio files - Recursively finds all audio files (mp3, wav, flac, etc.) - Extracts features with librosa - Classifies with Essentia (genre, mood, instruments) - Stores results in database Usage: python -m src.cli.scanner /path/to/music 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- backend/src/cli/__init__.py | 1 + backend/src/cli/scanner.py | 185 ++++++++++++++++++++++++++++++++++++ 2 files changed, 186 insertions(+) create mode 100644 backend/src/cli/__init__.py create mode 100644 backend/src/cli/scanner.py diff --git a/backend/src/cli/__init__.py b/backend/src/cli/__init__.py new file mode 100644 index 0000000..60da191 --- /dev/null +++ b/backend/src/cli/__init__.py @@ -0,0 +1 @@ +"""CLI tools for Audio Classifier.""" diff --git a/backend/src/cli/scanner.py b/backend/src/cli/scanner.py new file mode 100644 index 0000000..31cdd7c --- /dev/null +++ b/backend/src/cli/scanner.py @@ -0,0 +1,185 @@ +#!/usr/bin/env python3 +""" +Audio library scanner CLI tool. +Scans a directory for audio files and adds them to the database. +""" + +import os +import sys +import argparse +from pathlib import Path +from typing import List + +# Add parent directory to path for imports +sys.path.insert(0, str(Path(__file__).parent.parent.parent)) + +from src.core.audio_processor import extract_all_features +from src.core.essentia_classifier import EssentiaClassifier +from src.models.database import SessionLocal, Track +from src.utils.logging import get_logger + +logger = get_logger(__name__) + +# Supported audio formats +AUDIO_EXTENSIONS = {'.mp3', '.wav', '.flac', '.m4a', '.aac', '.ogg', '.wma'} + + +def find_audio_files(directory: str) -> List[Path]: + """Find all audio files in directory and subdirectories. + + Args: + directory: Root directory to scan + + Returns: + List of paths to audio files + """ + audio_files = [] + directory_path = Path(directory) + + if not directory_path.exists(): + logger.error(f"Directory does not exist: {directory}") + return [] + + logger.info(f"Scanning directory: {directory}") + + for root, dirs, files in os.walk(directory_path): + for file in files: + file_path = Path(root) / file + if file_path.suffix.lower() in AUDIO_EXTENSIONS: + audio_files.append(file_path) + + logger.info(f"Found {len(audio_files)} audio files") + return audio_files + + +def analyze_and_store(file_path: Path, classifier: EssentiaClassifier, db) -> bool: + """Analyze an audio file and store it in the database. + + Args: + file_path: Path to audio file + classifier: Essentia classifier instance + db: Database session + + Returns: + True if successful, False otherwise + """ + try: + logger.info(f"Processing: {file_path}") + + # Check if already in database + existing = db.query(Track).filter(Track.file_path == str(file_path)).first() + if existing: + logger.info(f"Already in database, skipping: {file_path}") + return True + + # Extract basic features with librosa + features = extract_all_features(str(file_path)) + + # Get genre classification + genre_result = classifier.predict_genre(str(file_path)) + + # Get mood classification + mood_result = classifier.predict_mood(str(file_path)) + + # Get instruments + instruments = classifier.predict_instruments(str(file_path)) + + # Create track record + track = Track( + file_path=str(file_path), + filename=file_path.name, + duration_seconds=features['duration_seconds'], + tempo_bpm=features['tempo_bpm'], + key=features['key'], + time_signature=features['time_signature'], + energy=features['energy'], + danceability=features['danceability'], + valence=features['valence'], + loudness_lufs=features['loudness_lufs'], + spectral_centroid=features['spectral_centroid'], + zero_crossing_rate=features['zero_crossing_rate'], + spectral_rolloff=features['spectral_rolloff'], + spectral_bandwidth=features['spectral_bandwidth'], + genre_primary=genre_result['primary'], + genre_secondary=genre_result['secondary'], + genre_confidence=genre_result['confidence'], + mood_primary=mood_result['primary'], + mood_secondary=mood_result['secondary'], + mood_arousal=mood_result['arousal'], + mood_valence=mood_result['valence'], + instruments=[i['name'] for i in instruments[:5]], # Top 5 + ) + + db.add(track) + db.commit() + + logger.info(f"✓ Added to database: {file_path.name}") + logger.info(f" Genre: {genre_result['primary']}, Mood: {mood_result['primary']}, " + f"Tempo: {features['tempo_bpm']:.1f} BPM") + + return True + + except Exception as e: + logger.error(f"Failed to process {file_path}: {e}") + db.rollback() + return False + + +def main(): + """Main scanner function.""" + parser = argparse.ArgumentParser( + description='Scan audio library and add tracks to database' + ) + parser.add_argument( + 'directory', + help='Directory to scan for audio files' + ) + parser.add_argument( + '--workers', + type=int, + default=1, + help='Number of parallel workers (default: 1)' + ) + + args = parser.parse_args() + + # Find audio files + audio_files = find_audio_files(args.directory) + + if not audio_files: + logger.warning("No audio files found!") + return + + # Initialize classifier + logger.info("Initializing Essentia classifier...") + classifier = EssentiaClassifier() + + # Process files + db = SessionLocal() + success_count = 0 + error_count = 0 + + try: + for i, file_path in enumerate(audio_files, 1): + logger.info(f"[{i}/{len(audio_files)}] Processing...") + + if analyze_and_store(file_path, classifier, db): + success_count += 1 + else: + error_count += 1 + + finally: + db.close() + + # Summary + logger.info("") + logger.info("=" * 60) + logger.info(f"Scan complete!") + logger.info(f" Total files: {len(audio_files)}") + logger.info(f" Successfully processed: {success_count}") + logger.info(f" Errors: {error_count}") + logger.info("=" * 60) + + +if __name__ == '__main__': + main()