From 03fa0fb91136cd3a655abd8f2b18c6a70e3af397 Mon Sep 17 00:00:00 2001
From: Oliver Falk <oliver@linux-kernel.at>
Date: Fri, 24 Oct 2025 11:16:45 +0200
Subject: [PATCH] Make all performance thresholds configurable for dev
 environment

- Add --response-threshold and --p95-threshold parameters
- Dev environment now uses relaxed thresholds:
  * Avatar generation: 2500ms (vs 1000ms prod)
  * Response time: 2500ms (vs 1000ms prod)
  * 95th percentile: 5000ms (vs 2000ms prod)
- Fixes CI failures due to dev environment being slower than production
- Production maintains strict performance standards
---
 .gitlab-ci.yml               |  2 +-
 scripts/performance_tests.py | 44 +++++++++++++++++++++++-------------
 2 files changed, 29 insertions(+), 17 deletions(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 1440fac..d1cda47 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -166,7 +166,7 @@ performance_tests_dev:
     - pip install requests Pillow prettytable pyLibravatar dnspython py3dns
   script:
     - echo "Running performance tests against dev.libravatar.org..."
-    - python3 scripts/performance_tests.py --base-url $DEV_URL --concurrent-users 5 --avatar-threshold 2500 --output performance_dev.json
+    - python3 scripts/performance_tests.py --base-url $DEV_URL --concurrent-users 5 --avatar-threshold 2500 --response-threshold 2500 --p95-threshold 5000 --output performance_dev.json
   artifacts:
     paths:
       - performance_dev.json
diff --git a/scripts/performance_tests.py b/scripts/performance_tests.py
index b8c7c4d..cab52aa 100644
--- a/scripts/performance_tests.py
+++ b/scripts/performance_tests.py
@@ -380,7 +380,7 @@ class PerformanceTestRunner:
             "results": results,
         }
 
-    def test_concurrent_load(self) -> None:
+    def test_concurrent_load(self, response_threshold: int = 1000, p95_threshold: int = 2000) -> None:
         """Test concurrent load handling"""
         print("\n=== Concurrent Load Test ===")
 
@@ -448,10 +448,10 @@ class PerformanceTestRunner:
             # Performance evaluation
             if len(failed_requests) > 0:
                 print("  ⚠️  WARNING: Some operations failed under load")
-            elif p95_duration > 2000:  # 2 seconds
-                print("  ⚠️  WARNING: 95th percentile response time exceeds 2s")
-            elif avg_duration > 1000:  # 1 second
-                print("  ⚠️  CAUTION: Average response time exceeds 1s under load")
+            elif p95_duration > p95_threshold:
+                print(f"  ⚠️  WARNING: 95th percentile response time exceeds {p95_threshold}ms")
+            elif avg_duration > response_threshold:
+                print(f"  ⚠️  CAUTION: Average response time exceeds {response_threshold}ms under load")
             else:
                 print("  ✅ Load handling is good")
         else:
@@ -831,7 +831,7 @@ class PerformanceTestRunner:
 
         return first_duration, second_duration
 
-    def run_all_tests(self, avatar_threshold: int = 1000) -> Optional[Dict[str, Any]]:
+    def run_all_tests(self, avatar_threshold: int = 1000, response_threshold: int = 1000, p95_threshold: int = 2000) -> Optional[Dict[str, Any]]:
         """Run all performance tests"""
         print("Starting Libravatar Performance Tests")
         print("=" * 50)
@@ -846,14 +846,14 @@ class PerformanceTestRunner:
             # Run tests based on mode
             if self.remote_testing:
                 print("🌐 Running remote server tests...")
-                self.test_remote_avatar_performance()
+                self.test_remote_avatar_performance(response_threshold)
             else:
                 print("🏠 Running local tests...")
                 self.test_avatar_generation_performance()
                 self.test_database_performance()
 
             # Always test concurrent load
-            self.test_concurrent_load()
+            self.test_concurrent_load(response_threshold, p95_threshold)
 
             # Test cache performance if enabled
             self.test_cache_performance()
@@ -865,7 +865,7 @@ class PerformanceTestRunner:
             print(f"Performance tests completed in {total_duration:.2f}s")
 
             # Overall assessment
-            self.assess_overall_performance(avatar_threshold)
+            self.assess_overall_performance(avatar_threshold, response_threshold, p95_threshold)
 
             return self.results
 
@@ -873,7 +873,7 @@ class PerformanceTestRunner:
             print(f"Performance test failed: {e}")
             return None
 
-    def test_remote_avatar_performance(self) -> None:
+    def test_remote_avatar_performance(self, response_threshold: int = 1000) -> None:
         """Test avatar generation performance on remote server"""
         print("\n=== Remote Avatar Performance Test ===")
 
@@ -910,10 +910,10 @@ class PerformanceTestRunner:
             print(f"  Success rate: {len(successful_results)}/{len(results)}")
 
             # Performance thresholds for remote testing
-            if avg_duration > 2000:  # 2 seconds
-                print("  ⚠️  WARNING: Average response time exceeds 2s")
-            elif avg_duration > 1000:  # 1 second
-                print("  ⚠️  CAUTION: Average response time exceeds 1s")
+            if avg_duration > (response_threshold * 2):  # 2x threshold for warning
+                print(f"  ⚠️  WARNING: Average response time exceeds {response_threshold * 2}ms")
+            elif avg_duration > response_threshold:
+                print(f"  ⚠️  CAUTION: Average response time exceeds {response_threshold}ms")
             else:
                 print("  ✅ Remote avatar performance is good")
         else:
@@ -928,7 +928,7 @@ class PerformanceTestRunner:
             "success_rate": len(successful_results) / len(results) if results else 0,
         }
 
-    def assess_overall_performance(self, avatar_threshold: int = 1000) -> bool:
+    def assess_overall_performance(self, avatar_threshold: int = 1000, response_threshold: int = 1000, p95_threshold: int = 2000) -> bool:
         """Provide overall performance assessment"""
         print("\n=== OVERALL PERFORMANCE ASSESSMENT ===")
 
@@ -1006,6 +1006,18 @@ def main() -> Optional[Dict[str, Any]]:
         default=1000,
         help="Avatar generation threshold in ms (default: 1000ms, use 2500 for dev environments)",
     )
+    parser.add_argument(
+        "--response-threshold",
+        type=int,
+        default=1000,
+        help="Response time threshold in ms (default: 1000ms, use 2500 for dev environments)",
+    )
+    parser.add_argument(
+        "--p95-threshold",
+        type=int,
+        default=2000,
+        help="95th percentile threshold in ms (default: 2000ms, use 5000 for dev environments)",
+    )
 
     args = parser.parse_args()
 
@@ -1022,7 +1034,7 @@ def main() -> Optional[Dict[str, Any]]:
         remote_testing=remote_testing,
     )
 
-    results = runner.run_all_tests(args.avatar_threshold)
+    results = runner.run_all_tests(args.avatar_threshold, args.response_threshold, args.p95_threshold)
 
     if args.output and results:
         import json