From dec30019e2246f7a228828e733f605ba3a6f8f6c Mon Sep 17 00:00:00 2001 From: Benoit Date: Mon, 22 Dec 2025 12:59:20 +0100 Subject: [PATCH] WIP essentia --- .claude/settings.local.json | 15 +++++ README.md | 4 ++ backend/Dockerfile | 4 +- backend/src/core/essentia_classifier.py | 78 +++++++++++++++++++------ docker-compose.yml | 9 ++- frontend/.dockerignore | 1 - frontend/.env.local | 1 + frontend/Dockerfile | 4 ++ scripts/download-essentia-models.sh | 19 ++++-- 9 files changed, 106 insertions(+), 29 deletions(-) create mode 100644 .claude/settings.local.json create mode 100644 frontend/.env.local diff --git a/.claude/settings.local.json b/.claude/settings.local.json new file mode 100644 index 0000000..b04e254 --- /dev/null +++ b/.claude/settings.local.json @@ -0,0 +1,15 @@ +{ + "permissions": { + "allow": [ + "Bash(node --version:*)", + "Bash(docker --version:*)", + "Bash(docker-compose:*)", + "Bash(test:*)", + "Bash(cp:*)", + "Bash(bash scripts/download-essentia-models.sh:*)", + "Bash(curl:*)", + "Bash(docker logs:*)", + "Bash(docker exec:*)" + ] + } +} diff --git a/README.md b/README.md index 07c6a70..e31c20b 100644 --- a/README.md +++ b/README.md @@ -95,6 +95,10 @@ curl -X POST http://localhost:8001/api/analyze/folder \ -H "Content-Type: application/json" \ -d '{"path": "/audio/music", "recursive": true}' ``` +#### Sous Windows 10 +````bash +curl.exe -X POST http://localhost:8001/api/analyze/folder -H "Content-Type: application/json" -d '{\"path\": \"/audio/\", \"recursive\": true}' +```` ### Rechercher des pistes diff --git a/backend/Dockerfile b/backend/Dockerfile index 4e61e97..7b56508 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -39,8 +39,8 @@ COPY requirements.txt . RUN pip install --no-cache-dir numpy==1.24.3 RUN pip install --no-cache-dir scipy==1.11.4 -# Install Essentia - Python 3.9 with ARM64 support -RUN pip install --no-cache-dir essentia +# Install Essentia-TensorFlow - Python 3.9 AMD64 support +RUN pip install --no-cache-dir essentia-tensorflow RUN pip install --no-cache-dir -r requirements.txt diff --git a/backend/src/core/essentia_classifier.py b/backend/src/core/essentia_classifier.py index 7b9347f..173b846 100644 --- a/backend/src/core/essentia_classifier.py +++ b/backend/src/core/essentia_classifier.py @@ -14,7 +14,8 @@ try: from essentia.standard import ( MonoLoader, TensorflowPredictEffnetDiscogs, - TensorflowPredict2D + TensorflowPredict2D, + TensorflowPredictMusiCNN ) ESSENTIA_AVAILABLE = True except ImportError: @@ -55,7 +56,17 @@ class EssentiaClassifier: logger.warning(f"Models path {self.models_path} does not exist") return - # Model file names + # Check for embedding model first + embedding_file = "discogs-effnet-bs64-1.pb" + embedding_path = self.models_path / embedding_file + if embedding_path.exists(): + logger.info(f"Loading embedding model from {embedding_path}") + self.models["embedding"] = str(embedding_path) + else: + logger.warning(f"Embedding model not found: {embedding_path}") + return # Cannot proceed without embeddings + + # Model file names for classification heads model_files = { "genre": "mtg_jamendo_genre-discogs-effnet-1.pb", "mood": "mtg_jamendo_moodtheme-discogs-effnet-1.pb", @@ -135,15 +146,26 @@ class EssentiaClassifier: return self._fallback_genre() try: - # Load audio + # Step 1: Extract embeddings using discogs-effnet audio = MonoLoader(filename=audio_path, sampleRate=16000, resampleQuality=4)() - # Predict - model = TensorflowPredictEffnetDiscogs( - graphFilename=self.models["genre"], + embedding_model = TensorflowPredictEffnetDiscogs( + graphFilename=self.models["embedding"], output="PartitionedCall:1" ) - predictions = model(audio) + embeddings = embedding_model(audio) + + # Average embeddings over time + embeddings_mean = np.mean(embeddings, axis=0) + + # Step 2: Feed embeddings to classification head + classifier = TensorflowPredict2D( + graphFilename=self.models["genre"], + input="model/Placeholder", + output="model/Sigmoid" + ) + predictions = classifier(embeddings_mean.reshape(1, -1)) + predictions = predictions[0] # Remove batch dimension # Get top predictions top_indices = np.argsort(predictions)[::-1][:5] @@ -172,19 +194,28 @@ class EssentiaClassifier: Returns: Dictionary with mood predictions """ - if not ESSENTIA_AVAILABLE or "mood" not in self.models: + if not ESSENTIA_AVAILABLE or "mood" not in self.models or "embedding" not in self.models: return self._fallback_mood() try: - # Load audio + # Step 1: Extract embeddings using discogs-effnet audio = MonoLoader(filename=audio_path, sampleRate=16000, resampleQuality=4)() - # Predict - model = TensorflowPredictEffnetDiscogs( - graphFilename=self.models["mood"], + embedding_model = TensorflowPredictEffnetDiscogs( + graphFilename=self.models["embedding"], output="PartitionedCall:1" ) - predictions = model(audio) + embeddings = embedding_model(audio) + embeddings_mean = np.mean(embeddings, axis=0) + + # Step 2: Feed embeddings to classification head + classifier = TensorflowPredict2D( + graphFilename=self.models["mood"], + input="model/Placeholder", + output="model/Sigmoid" + ) + predictions = classifier(embeddings_mean.reshape(1, -1)) + predictions = predictions[0] # Get top predictions top_indices = np.argsort(predictions)[::-1][:5] @@ -216,19 +247,28 @@ class EssentiaClassifier: Returns: List of instruments with confidence scores """ - if not ESSENTIA_AVAILABLE or "instrument" not in self.models: + if not ESSENTIA_AVAILABLE or "instrument" not in self.models or "embedding" not in self.models: return self._fallback_instruments() try: - # Load audio + # Step 1: Extract embeddings using discogs-effnet audio = MonoLoader(filename=audio_path, sampleRate=16000, resampleQuality=4)() - # Predict - model = TensorflowPredictEffnetDiscogs( - graphFilename=self.models["instrument"], + embedding_model = TensorflowPredictEffnetDiscogs( + graphFilename=self.models["embedding"], output="PartitionedCall:1" ) - predictions = model(audio) + embeddings = embedding_model(audio) + embeddings_mean = np.mean(embeddings, axis=0) + + # Step 2: Feed embeddings to classification head + classifier = TensorflowPredict2D( + graphFilename=self.models["instrument"], + input="model/Placeholder", + output="model/Sigmoid" + ) + predictions = classifier(embeddings_mean.reshape(1, -1)) + predictions = predictions[0] # Get instruments above threshold threshold = 0.1 diff --git a/docker-compose.yml b/docker-compose.yml index 45d274a..3643a6f 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -40,10 +40,15 @@ services: restart: unless-stopped frontend: - build: ./frontend + build: + context: ./frontend + args: + NEXT_PUBLIC_API_URL: http://localhost:8001 container_name: audio_classifier_ui environment: - NEXT_PUBLIC_API_URL: http://backend:8000 + # Use localhost:8001 because the browser (client-side) needs to access the API + # The backend is mapped to port 8001 on the host machine + NEXT_PUBLIC_API_URL: http://localhost:8001 ports: - "3000:3000" depends_on: diff --git a/frontend/.dockerignore b/frontend/.dockerignore index 1ea9800..12e9c9f 100644 --- a/frontend/.dockerignore +++ b/frontend/.dockerignore @@ -1,7 +1,6 @@ node_modules .next .git -.env.local npm-debug.log* yarn-debug.log* yarn-error.log* diff --git a/frontend/.env.local b/frontend/.env.local new file mode 100644 index 0000000..9be0d67 --- /dev/null +++ b/frontend/.env.local @@ -0,0 +1 @@ +NEXT_PUBLIC_API_URL=http://localhost:8001 diff --git a/frontend/Dockerfile b/frontend/Dockerfile index ecf7a27..0926f20 100644 --- a/frontend/Dockerfile +++ b/frontend/Dockerfile @@ -12,6 +12,10 @@ RUN npm ci # Copy application code COPY . . +# Build argument for API URL +ARG NEXT_PUBLIC_API_URL=http://localhost:8001 +ENV NEXT_PUBLIC_API_URL=${NEXT_PUBLIC_API_URL} + # Build the application RUN npm run build diff --git a/scripts/download-essentia-models.sh b/scripts/download-essentia-models.sh index 1eddd37..23a7e3b 100755 --- a/scripts/download-essentia-models.sh +++ b/scripts/download-essentia-models.sh @@ -6,7 +6,8 @@ set -e # Exit on error MODELS_DIR="backend/models" -BASE_URL="https://essentia.upf.edu/models/classification-heads" +CLASS_HEADS_URL="https://essentia.upf.edu/models/classification-heads" +EMBEDDINGS_URL="https://essentia.upf.edu/models/feature-extractors/discogs-effnet" echo "📦 Downloading Essentia models..." echo "Models directory: $MODELS_DIR" @@ -37,15 +38,23 @@ download_model() { fi } -# Download each model +# Download embedding model first (required for all classification heads) +echo "" +echo "Downloading embedding model..." +download_model "discogs-effnet-bs64-1.pb" \ + "$EMBEDDINGS_URL/discogs-effnet-bs64-1.pb" + +# Download classification heads +echo "" +echo "Downloading classification heads..." download_model "mtg_jamendo_genre-discogs-effnet-1.pb" \ - "$BASE_URL/mtg_jamendo_genre/mtg_jamendo_genre-discogs-effnet-1.pb" + "$CLASS_HEADS_URL/mtg_jamendo_genre/mtg_jamendo_genre-discogs-effnet-1.pb" download_model "mtg_jamendo_moodtheme-discogs-effnet-1.pb" \ - "$BASE_URL/mtg_jamendo_moodtheme/mtg_jamendo_moodtheme-discogs-effnet-1.pb" + "$CLASS_HEADS_URL/mtg_jamendo_moodtheme/mtg_jamendo_moodtheme-discogs-effnet-1.pb" download_model "mtg_jamendo_instrument-discogs-effnet-1.pb" \ - "$BASE_URL/mtg_jamendo_instrument/mtg_jamendo_instrument-discogs-effnet-1.pb" + "$CLASS_HEADS_URL/mtg_jamendo_instrument/mtg_jamendo_instrument-discogs-effnet-1.pb" echo "" echo "✅ All models downloaded successfully!"