Enhance the version endpoint and fix OTEL deployment

This commit is contained in:
Oliver Falk
2025-10-17 14:49:10 +02:00
parent 25e9e489c3
commit 6db3450b20
6 changed files with 140 additions and 55 deletions

View File

@@ -316,13 +316,6 @@ ENABLE_MALICIOUS_CONTENT_SCAN = True
# Logging configuration - can be overridden in local config
# Example: LOGS_DIR = "/var/log/ivatar" # For production deployments
# OpenTelemetry feature flag - can be disabled for F/LOSS deployments
ENABLE_OPENTELEMETRY = os.environ.get("ENABLE_OPENTELEMETRY", "false").lower() in (
"true",
"1",
"yes",
)
# This MUST BE THE LAST!
if os.path.isfile(os.path.join(BASE_DIR, "config_local.py")):
from config_local import * # noqa # flake8: noqa # NOQA # pragma: no cover

View File

@@ -129,6 +129,9 @@ class OpenTelemetryConfig:
)
metrics.set_meter_provider(meter_provider)
# Start Prometheus HTTP server for metrics endpoint
self._start_prometheus_server(prometheus_reader, prometheus_endpoint)
logger.info(
f"OpenTelemetry metrics configured with Prometheus endpoint: {prometheus_endpoint}"
)
@@ -137,6 +140,33 @@ class OpenTelemetryConfig:
logger.error(f"Failed to setup OpenTelemetry metrics: {e}")
self.enabled = False
def _start_prometheus_server(
self, prometheus_reader: PrometheusMetricReader, endpoint: str
) -> None:
"""Start Prometheus HTTP server for metrics endpoint."""
try:
from prometheus_client import start_http_server, REGISTRY
# Parse endpoint to get host and port
if ":" in endpoint:
host, port = endpoint.split(":", 1)
port = int(port)
else:
host = "0.0.0.0"
port = int(endpoint)
# Register the PrometheusMetricReader collector with prometheus_client
REGISTRY.register(prometheus_reader._collector)
# Start HTTP server
start_http_server(port, addr=host)
logger.info(f"Prometheus metrics server started on {host}:{port}")
except Exception as e:
logger.error(f"Failed to start Prometheus metrics server: {e}")
self.enabled = False
def setup_instrumentation(self) -> None:
"""Set up OpenTelemetry instrumentation for various libraries."""
try:

View File

@@ -311,16 +311,14 @@ DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField"
from config import * # pylint: disable=wildcard-import,wrong-import-position,unused-wildcard-import # noqa
# OpenTelemetry setup - must be after config import
# Only setup if feature flag is enabled
# Always setup OpenTelemetry (instrumentation always enabled, export controlled by OTEL_EXPORT_ENABLED)
try:
if getattr(globals(), "ENABLE_OPENTELEMETRY", False):
from ivatar.opentelemetry_config import setup_opentelemetry
setup_opentelemetry()
# Add OpenTelemetry middleware if enabled
# Add OpenTelemetry middleware (always enabled)
MIDDLEWARE.append("ivatar.opentelemetry_middleware.OpenTelemetryMiddleware")
except (ImportError, NameError):
# OpenTelemetry packages not installed or configuration failed
# ENABLE_OPENTELEMETRY not defined (shouldn't happen but be safe)
pass

View File

@@ -40,41 +40,13 @@ from .ivataraccount.models import Photo
from .ivataraccount.models import pil_format, file_format
from .utils import is_trusted_url, mm_ng, resize_animated_gif
# Import OpenTelemetry only if feature flag is enabled
# Import OpenTelemetry (always enabled, export controlled by OTEL_EXPORT_ENABLED)
try:
from django.conf import settings
if getattr(settings, "ENABLE_OPENTELEMETRY", False):
from .opentelemetry_middleware import trace_avatar_operation, get_avatar_metrics
avatar_metrics = get_avatar_metrics()
else:
# Create no-op decorators and metrics when OpenTelemetry is disabled
def trace_avatar_operation(operation_name):
def decorator(func):
return func
return decorator
class NoOpMetrics:
def record_avatar_generated(self, *args, **kwargs):
pass
def record_cache_hit(self, *args, **kwargs):
pass
def record_cache_miss(self, *args, **kwargs):
pass
def record_external_request(self, *args, **kwargs):
pass
def record_file_upload(self, *args, **kwargs):
pass
avatar_metrics = NoOpMetrics()
except ImportError:
# Django not available or settings not loaded
# OpenTelemetry packages not installed
def trace_avatar_operation(operation_name):
def decorator(func):
return func
@@ -845,7 +817,7 @@ class StatsView(TemplateView, JsonResponse):
return JsonResponse(retval)
# Thread-safe version cache
# Thread-safe version cache - cached indefinitely since container restarts on changes
_version_cache = None
_version_cache_lock = threading.Lock()
@@ -889,15 +861,29 @@ def _get_git_info_from_files():
branch_name = "detached"
# Try to get commit date from git log file (if available)
# Optimize: read only the last line instead of entire file
commit_date = None
log_file = path.join(git_dir, "logs", "HEAD")
if path.exists(log_file):
try:
with open(log_file, "r") as f:
# Read last line to get most recent commit info
lines = f.readlines()
if lines:
last_line = lines[-1].strip()
with open(log_file, "rb") as f:
# Seek to end and read backwards to find last line
f.seek(0, 2) # Seek to end
file_size = f.tell()
# Read backwards in chunks to find the last line
chunk_size = min(1024, file_size)
f.seek(max(0, file_size - chunk_size))
chunk = f.read().decode("utf-8", errors="ignore")
# Find the last newline
last_newline = chunk.rfind("\n")
if last_newline != -1:
last_line = chunk[last_newline + 1:].strip()
else:
last_line = chunk.strip()
if last_line:
# Git log format: <old_hash> <new_hash> <author> <timestamp> <timezone> <message>
parts = last_line.split("\t")
if len(parts) >= 2:
@@ -910,7 +896,7 @@ def _get_git_info_from_files():
commit_date = datetime.datetime.fromtimestamp(
timestamp
).strftime("%Y-%m-%d %H:%M:%S %z")
except (ValueError, IndexError):
except (ValueError, IndexError, UnicodeDecodeError):
pass
# Fallback: try to get date from commit object if available
@@ -942,6 +928,7 @@ def _get_git_info_from_files():
def _get_cached_version_info():
"""
Get cached version information, loading it if not available
Since containers restart on content changes, cache indefinitely
"""
global _version_cache

View File

@@ -35,6 +35,7 @@ opentelemetry-instrumentation-urllib3>=0.42b0
opentelemetry-sdk>=1.20.0
Pillow
pip
prometheus-client>=0.20.0
psycopg2-binary
py3dns
pydocstyle

View File

@@ -20,6 +20,7 @@ import argparse
import json
import random
import ssl
import subprocess
import sys
import tempfile
import time
@@ -57,6 +58,52 @@ def colored_print(message: str, color: str = Colors.NC) -> None:
print(f"{color}{message}{Colors.NC}")
def get_current_commit_hash() -> Optional[str]:
"""Get the current commit hash from git."""
try:
result = subprocess.run(
["git", "rev-parse", "HEAD"],
capture_output=True,
text=True,
check=True,
)
return result.stdout.strip()
except (subprocess.CalledProcessError, FileNotFoundError):
return None
def is_commit_newer_or_equal(commit1: str, commit2: str) -> Optional[bool]:
"""
Check if commit1 is newer than or equal to commit2 in git history.
Returns:
True if commit1 is newer or equal to commit2
False if commit1 is older than commit2
None if comparison fails
"""
try:
# Use git merge-base to check if commit1 is reachable from commit2
# If commit1 is newer or equal, it should be reachable from commit2
subprocess.run(
["git", "merge-base", "--is-ancestor", commit2, commit1],
capture_output=True,
check=True,
)
return True
except subprocess.CalledProcessError:
# If the above fails, try the reverse - check if commit2 is newer
try:
result = subprocess.run(
["git", "merge-base", "--is-ancestor", commit1, commit2],
capture_output=True,
check=True,
)
return False
except subprocess.CalledProcessError:
# If both fail, we can't determine the relationship
return None
def make_request(
url: str,
method: str = "GET",
@@ -289,14 +336,43 @@ def test_deployment(
)
# Display version information
commit_hash = version_info.get("commit_hash", "Unknown")
deployed_commit = version_info.get("commit_hash", "Unknown")
branch = version_info.get("branch", "Unknown")
version = version_info.get("version", "Unknown")
colored_print(f"Deployed commit: {commit_hash}", Colors.BLUE)
colored_print(f"Deployed commit: {deployed_commit}", Colors.BLUE)
colored_print(f"Deployed branch: {branch}", Colors.BLUE)
colored_print(f"Deployed version: {version}", Colors.BLUE)
# Check if we're looking for a specific version and compare
current_commit = get_current_commit_hash()
if current_commit and deployed_commit != "Unknown":
if deployed_commit == current_commit:
colored_print(
"✅ Exact version match - deployment is up to date!",
Colors.GREEN,
)
else:
# Check if deployed version is newer
comparison = is_commit_newer_or_equal(
deployed_commit, current_commit
)
if comparison is True:
colored_print(
" Note: A newer version is already deployed (this is fine!)",
Colors.YELLOW,
)
elif comparison is False:
colored_print(
"⚠️ Warning: Deployed version appears to be older than expected",
Colors.YELLOW,
)
else:
colored_print(
"⚠️ Warning: Could not determine version relationship",
Colors.YELLOW,
)
# Run functionality tests
colored_print("Running basic functionality tests...", Colors.YELLOW)