Add CLI scanner tool for audio library
Create scanner.py to scan directories and analyze audio files - Recursively finds all audio files (mp3, wav, flac, etc.) - Extracts features with librosa - Classifies with Essentia (genre, mood, instruments) - Stores results in database Usage: python -m src.cli.scanner /path/to/music 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
1
backend/src/cli/__init__.py
Normal file
1
backend/src/cli/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
"""CLI tools for Audio Classifier."""
|
||||||
185
backend/src/cli/scanner.py
Normal file
185
backend/src/cli/scanner.py
Normal file
@@ -0,0 +1,185 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Audio library scanner CLI tool.
|
||||||
|
Scans a directory for audio files and adds them to the database.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import argparse
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
# Add parent directory to path for imports
|
||||||
|
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
|
||||||
|
|
||||||
|
from src.core.audio_processor import extract_all_features
|
||||||
|
from src.core.essentia_classifier import EssentiaClassifier
|
||||||
|
from src.models.database import SessionLocal, Track
|
||||||
|
from src.utils.logging import get_logger
|
||||||
|
|
||||||
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
|
# Supported audio formats
|
||||||
|
AUDIO_EXTENSIONS = {'.mp3', '.wav', '.flac', '.m4a', '.aac', '.ogg', '.wma'}
|
||||||
|
|
||||||
|
|
||||||
|
def find_audio_files(directory: str) -> List[Path]:
|
||||||
|
"""Find all audio files in directory and subdirectories.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
directory: Root directory to scan
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of paths to audio files
|
||||||
|
"""
|
||||||
|
audio_files = []
|
||||||
|
directory_path = Path(directory)
|
||||||
|
|
||||||
|
if not directory_path.exists():
|
||||||
|
logger.error(f"Directory does not exist: {directory}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
logger.info(f"Scanning directory: {directory}")
|
||||||
|
|
||||||
|
for root, dirs, files in os.walk(directory_path):
|
||||||
|
for file in files:
|
||||||
|
file_path = Path(root) / file
|
||||||
|
if file_path.suffix.lower() in AUDIO_EXTENSIONS:
|
||||||
|
audio_files.append(file_path)
|
||||||
|
|
||||||
|
logger.info(f"Found {len(audio_files)} audio files")
|
||||||
|
return audio_files
|
||||||
|
|
||||||
|
|
||||||
|
def analyze_and_store(file_path: Path, classifier: EssentiaClassifier, db) -> bool:
|
||||||
|
"""Analyze an audio file and store it in the database.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
file_path: Path to audio file
|
||||||
|
classifier: Essentia classifier instance
|
||||||
|
db: Database session
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if successful, False otherwise
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
logger.info(f"Processing: {file_path}")
|
||||||
|
|
||||||
|
# Check if already in database
|
||||||
|
existing = db.query(Track).filter(Track.file_path == str(file_path)).first()
|
||||||
|
if existing:
|
||||||
|
logger.info(f"Already in database, skipping: {file_path}")
|
||||||
|
return True
|
||||||
|
|
||||||
|
# Extract basic features with librosa
|
||||||
|
features = extract_all_features(str(file_path))
|
||||||
|
|
||||||
|
# Get genre classification
|
||||||
|
genre_result = classifier.predict_genre(str(file_path))
|
||||||
|
|
||||||
|
# Get mood classification
|
||||||
|
mood_result = classifier.predict_mood(str(file_path))
|
||||||
|
|
||||||
|
# Get instruments
|
||||||
|
instruments = classifier.predict_instruments(str(file_path))
|
||||||
|
|
||||||
|
# Create track record
|
||||||
|
track = Track(
|
||||||
|
file_path=str(file_path),
|
||||||
|
filename=file_path.name,
|
||||||
|
duration_seconds=features['duration_seconds'],
|
||||||
|
tempo_bpm=features['tempo_bpm'],
|
||||||
|
key=features['key'],
|
||||||
|
time_signature=features['time_signature'],
|
||||||
|
energy=features['energy'],
|
||||||
|
danceability=features['danceability'],
|
||||||
|
valence=features['valence'],
|
||||||
|
loudness_lufs=features['loudness_lufs'],
|
||||||
|
spectral_centroid=features['spectral_centroid'],
|
||||||
|
zero_crossing_rate=features['zero_crossing_rate'],
|
||||||
|
spectral_rolloff=features['spectral_rolloff'],
|
||||||
|
spectral_bandwidth=features['spectral_bandwidth'],
|
||||||
|
genre_primary=genre_result['primary'],
|
||||||
|
genre_secondary=genre_result['secondary'],
|
||||||
|
genre_confidence=genre_result['confidence'],
|
||||||
|
mood_primary=mood_result['primary'],
|
||||||
|
mood_secondary=mood_result['secondary'],
|
||||||
|
mood_arousal=mood_result['arousal'],
|
||||||
|
mood_valence=mood_result['valence'],
|
||||||
|
instruments=[i['name'] for i in instruments[:5]], # Top 5
|
||||||
|
)
|
||||||
|
|
||||||
|
db.add(track)
|
||||||
|
db.commit()
|
||||||
|
|
||||||
|
logger.info(f"✓ Added to database: {file_path.name}")
|
||||||
|
logger.info(f" Genre: {genre_result['primary']}, Mood: {mood_result['primary']}, "
|
||||||
|
f"Tempo: {features['tempo_bpm']:.1f} BPM")
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to process {file_path}: {e}")
|
||||||
|
db.rollback()
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""Main scanner function."""
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description='Scan audio library and add tracks to database'
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'directory',
|
||||||
|
help='Directory to scan for audio files'
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'--workers',
|
||||||
|
type=int,
|
||||||
|
default=1,
|
||||||
|
help='Number of parallel workers (default: 1)'
|
||||||
|
)
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
# Find audio files
|
||||||
|
audio_files = find_audio_files(args.directory)
|
||||||
|
|
||||||
|
if not audio_files:
|
||||||
|
logger.warning("No audio files found!")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Initialize classifier
|
||||||
|
logger.info("Initializing Essentia classifier...")
|
||||||
|
classifier = EssentiaClassifier()
|
||||||
|
|
||||||
|
# Process files
|
||||||
|
db = SessionLocal()
|
||||||
|
success_count = 0
|
||||||
|
error_count = 0
|
||||||
|
|
||||||
|
try:
|
||||||
|
for i, file_path in enumerate(audio_files, 1):
|
||||||
|
logger.info(f"[{i}/{len(audio_files)}] Processing...")
|
||||||
|
|
||||||
|
if analyze_and_store(file_path, classifier, db):
|
||||||
|
success_count += 1
|
||||||
|
else:
|
||||||
|
error_count += 1
|
||||||
|
|
||||||
|
finally:
|
||||||
|
db.close()
|
||||||
|
|
||||||
|
# Summary
|
||||||
|
logger.info("")
|
||||||
|
logger.info("=" * 60)
|
||||||
|
logger.info(f"Scan complete!")
|
||||||
|
logger.info(f" Total files: {len(audio_files)}")
|
||||||
|
logger.info(f" Successfully processed: {success_count}")
|
||||||
|
logger.info(f" Errors: {error_count}")
|
||||||
|
logger.info("=" * 60)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
Reference in New Issue
Block a user