From dec30019e2246f7a228828e733f605ba3a6f8f6c Mon Sep 17 00:00:00 2001
From: Benoit <benoit.91800@hotmail.fr>
Date: Mon, 22 Dec 2025 12:59:20 +0100
Subject: [PATCH] WIP essentia

---
 .claude/settings.local.json             | 15 +++++
 README.md                               |  4 ++
 backend/Dockerfile                      |  4 +-
 backend/src/core/essentia_classifier.py | 78 +++++++++++++++++++------
 docker-compose.yml                      |  9 ++-
 frontend/.dockerignore                  |  1 -
 frontend/.env.local                     |  1 +
 frontend/Dockerfile                     |  4 ++
 scripts/download-essentia-models.sh     | 19 ++++--
 9 files changed, 106 insertions(+), 29 deletions(-)
 create mode 100644 .claude/settings.local.json
 create mode 100644 frontend/.env.local

diff --git a/.claude/settings.local.json b/.claude/settings.local.json
new file mode 100644
index 0000000..b04e254
--- /dev/null
+++ b/.claude/settings.local.json
@@ -0,0 +1,15 @@
+{
+  "permissions": {
+    "allow": [
+      "Bash(node --version:*)",
+      "Bash(docker --version:*)",
+      "Bash(docker-compose:*)",
+      "Bash(test:*)",
+      "Bash(cp:*)",
+      "Bash(bash scripts/download-essentia-models.sh:*)",
+      "Bash(curl:*)",
+      "Bash(docker logs:*)",
+      "Bash(docker exec:*)"
+    ]
+  }
+}
diff --git a/README.md b/README.md
index 07c6a70..e31c20b 100644
--- a/README.md
+++ b/README.md
@@ -95,6 +95,10 @@ curl -X POST http://localhost:8001/api/analyze/folder \
   -H "Content-Type: application/json" \
   -d '{"path": "/audio/music", "recursive": true}'
 ```
+#### Sous Windows 10
+````bash
+curl.exe -X POST http://localhost:8001/api/analyze/folder -H "Content-Type: application/json" -d '{\"path\": \"/audio/\", \"recursive\": true}'
+````
 
 ### Rechercher des pistes
 
diff --git a/backend/Dockerfile b/backend/Dockerfile
index 4e61e97..7b56508 100644
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@@ -39,8 +39,8 @@ COPY requirements.txt .
 RUN pip install --no-cache-dir numpy==1.24.3
 RUN pip install --no-cache-dir scipy==1.11.4
 
-# Install Essentia - Python 3.9 with ARM64 support
-RUN pip install --no-cache-dir essentia
+# Install Essentia-TensorFlow - Python 3.9 AMD64 support
+RUN pip install --no-cache-dir essentia-tensorflow
 
 RUN pip install --no-cache-dir -r requirements.txt
 
diff --git a/backend/src/core/essentia_classifier.py b/backend/src/core/essentia_classifier.py
index 7b9347f..173b846 100644
--- a/backend/src/core/essentia_classifier.py
+++ b/backend/src/core/essentia_classifier.py
@@ -14,7 +14,8 @@ try:
     from essentia.standard import (
         MonoLoader,
         TensorflowPredictEffnetDiscogs,
-        TensorflowPredict2D
+        TensorflowPredict2D,
+        TensorflowPredictMusiCNN
     )
     ESSENTIA_AVAILABLE = True
 except ImportError:
@@ -55,7 +56,17 @@ class EssentiaClassifier:
             logger.warning(f"Models path {self.models_path} does not exist")
             return
 
-        # Model file names
+        # Check for embedding model first
+        embedding_file = "discogs-effnet-bs64-1.pb"
+        embedding_path = self.models_path / embedding_file
+        if embedding_path.exists():
+            logger.info(f"Loading embedding model from {embedding_path}")
+            self.models["embedding"] = str(embedding_path)
+        else:
+            logger.warning(f"Embedding model not found: {embedding_path}")
+            return  # Cannot proceed without embeddings
+
+        # Model file names for classification heads
         model_files = {
             "genre": "mtg_jamendo_genre-discogs-effnet-1.pb",
             "mood": "mtg_jamendo_moodtheme-discogs-effnet-1.pb",
@@ -135,15 +146,26 @@ class EssentiaClassifier:
             return self._fallback_genre()
 
         try:
-            # Load audio
+            # Step 1: Extract embeddings using discogs-effnet
             audio = MonoLoader(filename=audio_path, sampleRate=16000, resampleQuality=4)()
 
-            # Predict
-            model = TensorflowPredictEffnetDiscogs(
-                graphFilename=self.models["genre"],
+            embedding_model = TensorflowPredictEffnetDiscogs(
+                graphFilename=self.models["embedding"],
                 output="PartitionedCall:1"
             )
-            predictions = model(audio)
+            embeddings = embedding_model(audio)
+
+            # Average embeddings over time
+            embeddings_mean = np.mean(embeddings, axis=0)
+
+            # Step 2: Feed embeddings to classification head
+            classifier = TensorflowPredict2D(
+                graphFilename=self.models["genre"],
+                input="model/Placeholder",
+                output="model/Sigmoid"
+            )
+            predictions = classifier(embeddings_mean.reshape(1, -1))
+            predictions = predictions[0]  # Remove batch dimension
 
             # Get top predictions
             top_indices = np.argsort(predictions)[::-1][:5]
@@ -172,19 +194,28 @@ class EssentiaClassifier:
         Returns:
             Dictionary with mood predictions
         """
-        if not ESSENTIA_AVAILABLE or "mood" not in self.models:
+        if not ESSENTIA_AVAILABLE or "mood" not in self.models or "embedding" not in self.models:
             return self._fallback_mood()
 
         try:
-            # Load audio
+            # Step 1: Extract embeddings using discogs-effnet
             audio = MonoLoader(filename=audio_path, sampleRate=16000, resampleQuality=4)()
 
-            # Predict
-            model = TensorflowPredictEffnetDiscogs(
-                graphFilename=self.models["mood"],
+            embedding_model = TensorflowPredictEffnetDiscogs(
+                graphFilename=self.models["embedding"],
                 output="PartitionedCall:1"
             )
-            predictions = model(audio)
+            embeddings = embedding_model(audio)
+            embeddings_mean = np.mean(embeddings, axis=0)
+
+            # Step 2: Feed embeddings to classification head
+            classifier = TensorflowPredict2D(
+                graphFilename=self.models["mood"],
+                input="model/Placeholder",
+                output="model/Sigmoid"
+            )
+            predictions = classifier(embeddings_mean.reshape(1, -1))
+            predictions = predictions[0]
 
             # Get top predictions
             top_indices = np.argsort(predictions)[::-1][:5]
@@ -216,19 +247,28 @@ class EssentiaClassifier:
         Returns:
             List of instruments with confidence scores
         """
-        if not ESSENTIA_AVAILABLE or "instrument" not in self.models:
+        if not ESSENTIA_AVAILABLE or "instrument" not in self.models or "embedding" not in self.models:
             return self._fallback_instruments()
 
         try:
-            # Load audio
+            # Step 1: Extract embeddings using discogs-effnet
             audio = MonoLoader(filename=audio_path, sampleRate=16000, resampleQuality=4)()
 
-            # Predict
-            model = TensorflowPredictEffnetDiscogs(
-                graphFilename=self.models["instrument"],
+            embedding_model = TensorflowPredictEffnetDiscogs(
+                graphFilename=self.models["embedding"],
                 output="PartitionedCall:1"
             )
-            predictions = model(audio)
+            embeddings = embedding_model(audio)
+            embeddings_mean = np.mean(embeddings, axis=0)
+
+            # Step 2: Feed embeddings to classification head
+            classifier = TensorflowPredict2D(
+                graphFilename=self.models["instrument"],
+                input="model/Placeholder",
+                output="model/Sigmoid"
+            )
+            predictions = classifier(embeddings_mean.reshape(1, -1))
+            predictions = predictions[0]
 
             # Get instruments above threshold
             threshold = 0.1
diff --git a/docker-compose.yml b/docker-compose.yml
index 45d274a..3643a6f 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -40,10 +40,15 @@ services:
     restart: unless-stopped
 
   frontend:
-    build: ./frontend
+    build:
+      context: ./frontend
+      args:
+        NEXT_PUBLIC_API_URL: http://localhost:8001
     container_name: audio_classifier_ui
     environment:
-      NEXT_PUBLIC_API_URL: http://backend:8000
+      # Use localhost:8001 because the browser (client-side) needs to access the API
+      # The backend is mapped to port 8001 on the host machine
+      NEXT_PUBLIC_API_URL: http://localhost:8001
     ports:
       - "3000:3000"
     depends_on:
diff --git a/frontend/.dockerignore b/frontend/.dockerignore
index 1ea9800..12e9c9f 100644
--- a/frontend/.dockerignore
+++ b/frontend/.dockerignore
@@ -1,7 +1,6 @@
 node_modules
 .next
 .git
-.env.local
 npm-debug.log*
 yarn-debug.log*
 yarn-error.log*
diff --git a/frontend/.env.local b/frontend/.env.local
new file mode 100644
index 0000000..9be0d67
--- /dev/null
+++ b/frontend/.env.local
@@ -0,0 +1 @@
+NEXT_PUBLIC_API_URL=http://localhost:8001
diff --git a/frontend/Dockerfile b/frontend/Dockerfile
index ecf7a27..0926f20 100644
--- a/frontend/Dockerfile
+++ b/frontend/Dockerfile
@@ -12,6 +12,10 @@ RUN npm ci
 # Copy application code
 COPY . .
 
+# Build argument for API URL
+ARG NEXT_PUBLIC_API_URL=http://localhost:8001
+ENV NEXT_PUBLIC_API_URL=${NEXT_PUBLIC_API_URL}
+
 # Build the application
 RUN npm run build
 
diff --git a/scripts/download-essentia-models.sh b/scripts/download-essentia-models.sh
index 1eddd37..23a7e3b 100755
--- a/scripts/download-essentia-models.sh
+++ b/scripts/download-essentia-models.sh
@@ -6,7 +6,8 @@
 set -e  # Exit on error
 
 MODELS_DIR="backend/models"
-BASE_URL="https://essentia.upf.edu/models/classification-heads"
+CLASS_HEADS_URL="https://essentia.upf.edu/models/classification-heads"
+EMBEDDINGS_URL="https://essentia.upf.edu/models/feature-extractors/discogs-effnet"
 
 echo "📦 Downloading Essentia models..."
 echo "Models directory: $MODELS_DIR"
@@ -37,15 +38,23 @@ download_model() {
     fi
 }
 
-# Download each model
+# Download embedding model first (required for all classification heads)
+echo ""
+echo "Downloading embedding model..."
+download_model "discogs-effnet-bs64-1.pb" \
+    "$EMBEDDINGS_URL/discogs-effnet-bs64-1.pb"
+
+# Download classification heads
+echo ""
+echo "Downloading classification heads..."
 download_model "mtg_jamendo_genre-discogs-effnet-1.pb" \
-    "$BASE_URL/mtg_jamendo_genre/mtg_jamendo_genre-discogs-effnet-1.pb"
+    "$CLASS_HEADS_URL/mtg_jamendo_genre/mtg_jamendo_genre-discogs-effnet-1.pb"
 
 download_model "mtg_jamendo_moodtheme-discogs-effnet-1.pb" \
-    "$BASE_URL/mtg_jamendo_moodtheme/mtg_jamendo_moodtheme-discogs-effnet-1.pb"
+    "$CLASS_HEADS_URL/mtg_jamendo_moodtheme/mtg_jamendo_moodtheme-discogs-effnet-1.pb"
 
 download_model "mtg_jamendo_instrument-discogs-effnet-1.pb" \
-    "$BASE_URL/mtg_jamendo_instrument/mtg_jamendo_instrument-discogs-effnet-1.pb"
+    "$CLASS_HEADS_URL/mtg_jamendo_instrument/mtg_jamendo_instrument-discogs-effnet-1.pb"
 
 echo ""
 echo "✅ All models downloaded successfully!"