Simplify OpenTelemetry approach - always enable instrumentation

- Always enable OpenTelemetry instrumentation, use OTEL_EXPORT_ENABLED for data export control
- Remove conditional checks from middleware, metrics, and decorators
- Simplify CI configuration to use single test job instead of parallel jobs
- Update tests to remove conditional logic and mocking of is_enabled()
- Add comprehensive environment variable documentation to README
- Update config.py to always add OpenTelemetry middleware
- Replace ENABLE_OPENTELEMETRY/OTEL_ENABLED with OTEL_EXPORT_ENABLED

This approach is much simpler and eliminates the complexity of conditional
OpenTelemetry loading while still allowing control over data export.
This commit is contained in:
Oliver Falk
2025-10-17 09:54:59 +02:00
parent b4e10e3ec5
commit 4d86a11728
6 changed files with 128 additions and 240 deletions

View File

@@ -11,56 +11,21 @@ cache:
variables:
PIP_CACHE_DIR: .pipcache
# Test without OpenTelemetry (baseline testing)
test_without_opentelemetry:
stage: build
services:
- postgres:latest
variables:
POSTGRES_DB: django_db_no_otel
POSTGRES_USER: django_user
POSTGRES_PASSWORD: django_password
POSTGRES_HOST: postgres
DATABASE_URL: "postgres://django_user:django_password@postgres/django_db_no_otel"
PYTHONUNBUFFERED: 1
# Ensure OpenTelemetry is disabled
ENABLE_OPENTELEMETRY: "false"
OTEL_ENABLED: "false"
before_script:
- virtualenv -p python3 /tmp/.virtualenv
- source /tmp/.virtualenv/bin/activate
- pip install -U pip
- pip install Pillow
- pip install -r requirements.txt
- pip install pycco
script:
- source /tmp/.virtualenv/bin/activate
- echo 'from ivatar.settings import TEMPLATES' > config_local.py
- echo 'TEMPLATES[0]["OPTIONS"]["debug"] = True' >> config_local.py
- echo "DEBUG = True" >> config_local.py
- echo "from config import CACHES" >> config_local.py
- echo "CACHES['default'] = CACHES['filesystem']" >> config_local.py
- python manage.py sqldsn
- python manage.py collectstatic --noinput
- echo "Running tests without OpenTelemetry..."
- ./scripts/run_tests_no_ot.sh
# Test with OpenTelemetry enabled and measure coverage
test_with_opentelemetry_and_coverage:
# Test with OpenTelemetry instrumentation (always enabled, export disabled in CI)
test_and_coverage:
stage: build
coverage: "/^TOTAL.*\\s+(\\d+\\%)$/"
services:
- postgres:latest
variables:
POSTGRES_DB: django_db_with_otel
POSTGRES_DB: django_db
POSTGRES_USER: django_user
POSTGRES_PASSWORD: django_password
POSTGRES_HOST: postgres
DATABASE_URL: "postgres://django_user:django_password@postgres/django_db_with_otel"
DATABASE_URL: "postgres://django_user:django_password@postgres/django_db"
PYTHONUNBUFFERED: 1
# Enable OpenTelemetry for comprehensive testing
ENABLE_OPENTELEMETRY: "true"
OTEL_ENABLED: "true"
# OpenTelemetry instrumentation always enabled, export controlled by OTEL_EXPORT_ENABLED
OTEL_EXPORT_ENABLED: "false" # Disable export in CI to avoid external dependencies
OTEL_SERVICE_NAME: "ivatar-ci"
OTEL_ENVIRONMENT: "ci"
before_script:
@@ -82,7 +47,7 @@ test_with_opentelemetry_and_coverage:
- echo "CACHES['default'] = CACHES['filesystem']" >> config_local.py
- python manage.py sqldsn
- python manage.py collectstatic --noinput
- echo "Running tests with OpenTelemetry enabled and measuring coverage..."
- echo "Running tests with OpenTelemetry instrumentation enabled..."
- coverage run --source . scripts/run_tests_with_coverage.py
- coverage report --fail-under=70
- coverage html
@@ -113,7 +78,7 @@ pycco:
pages:
stage: deploy
dependencies:
- test_with_opentelemetry_and_coverage
- test_and_coverage
- pycco
script:
- mv htmlcov/ public/

View File

@@ -10,6 +10,50 @@
- [Coverage HTML report](http://oliver.git.linux-kernel.at/ivatar)
- [Code documentation (autogenerated, pycco)](http://oliver.git.linux-kernel.at/ivatar/pycco/)
# Environment Variables
## OpenTelemetry Configuration
OpenTelemetry instrumentation is always enabled in ivatar. The following environment variables control the behavior:
### Core Configuration
- `OTEL_SERVICE_NAME`: Service name for OpenTelemetry (default: "ivatar")
- `OTEL_ENVIRONMENT`: Deployment environment (default: "production")
- `OTEL_EXPORT_ENABLED`: Enable/disable data export (default: "false")
- Set to "true" to enable sending telemetry data to external collectors
- Set to "false" to disable export (instrumentation still active)
### Export Configuration
- `OTEL_EXPORTER_OTLP_ENDPOINT`: OTLP endpoint for traces and metrics export
- Example: "http://localhost:4317" (gRPC) or "http://localhost:4318" (HTTP)
- `OTEL_PROMETHEUS_ENDPOINT`: Prometheus metrics endpoint (default: "0.0.0.0:9464")
### Legacy Configuration (Deprecated)
- `ENABLE_OPENTELEMETRY`: Legacy flag, no longer used (instrumentation always enabled)
- `OTEL_ENABLED`: Legacy flag, no longer used (instrumentation always enabled)
## Example Configurations
### Development (Export Disabled)
```bash
export OTEL_EXPORT_ENABLED=false
export OTEL_SERVICE_NAME=ivatar-dev
export OTEL_ENVIRONMENT=development
```
### Production (Export Enabled)
```bash
export OTEL_EXPORT_ENABLED=true
export OTEL_SERVICE_NAME=ivatar
export OTEL_ENVIRONMENT=production
export OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4317
```
# Testing
## Running Tests

View File

@@ -35,6 +35,9 @@ MIDDLEWARE.extend(
]
)
# Add OpenTelemetry middleware (always enabled now)
MIDDLEWARE.insert(0, "ivatar.opentelemetry_middleware.OpenTelemetryMiddleware")
# Add OpenTelemetry middleware only if feature flag is enabled
# Note: This will be checked at runtime, not at import time
MIDDLEWARE.insert(

View File

@@ -37,30 +37,19 @@ class OpenTelemetryConfig:
"""
def __init__(self):
self.enabled = self._is_enabled()
self.enabled = True # Always enable OpenTelemetry instrumentation
self.export_enabled = self._is_export_enabled()
self.service_name = self._get_service_name()
self.environment = self._get_environment()
self.resource = self._create_resource()
def _is_enabled(self) -> bool:
"""Check if OpenTelemetry is enabled via environment variable and Django settings."""
# First check Django settings (for F/LOSS deployments)
try:
from django.conf import settings
from django.core.exceptions import ImproperlyConfigured
try:
if getattr(settings, "ENABLE_OPENTELEMETRY", False):
return True
except ImproperlyConfigured:
# Django settings not configured yet, fall back to environment variable
pass
except ImportError:
# Django not available yet, fall back to environment variable
pass
# Then check OpenTelemetry-specific environment variable
return os.environ.get("OTEL_ENABLED", "false").lower() in ("true", "1", "yes")
def _is_export_enabled(self) -> bool:
"""Check if OpenTelemetry data export is enabled via environment variable."""
return os.environ.get("OTEL_EXPORT_ENABLED", "false").lower() in (
"true",
"1",
"yes",
)
def _get_service_name(self) -> str:
"""Get service name from environment or default."""
@@ -84,26 +73,27 @@ class OpenTelemetryConfig:
def setup_tracing(self) -> None:
"""Set up OpenTelemetry tracing."""
if not self.enabled:
logger.info("OpenTelemetry tracing disabled")
return
try:
# Set up tracer provider
trace.set_tracer_provider(TracerProvider(resource=self.resource))
tracer_provider = trace.get_tracer_provider()
# Configure OTLP exporter if endpoint is provided
otlp_endpoint = os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT")
if otlp_endpoint:
otlp_exporter = OTLPSpanExporter(endpoint=otlp_endpoint)
span_processor = BatchSpanProcessor(otlp_exporter)
tracer_provider.add_span_processor(span_processor)
logger.info(
f"OpenTelemetry tracing configured with OTLP endpoint: {otlp_endpoint}"
)
# Configure OTLP exporter if export is enabled and endpoint is provided
if self.export_enabled:
otlp_endpoint = os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT")
if otlp_endpoint:
otlp_exporter = OTLPSpanExporter(endpoint=otlp_endpoint)
span_processor = BatchSpanProcessor(otlp_exporter)
tracer_provider.add_span_processor(span_processor)
logger.info(
f"OpenTelemetry tracing configured with OTLP endpoint: {otlp_endpoint}"
)
else:
logger.info(
"OpenTelemetry tracing configured without OTLP endpoint"
)
else:
logger.info("OpenTelemetry tracing configured without OTLP exporter")
logger.info("OpenTelemetry tracing configured (export disabled)")
except Exception as e:
logger.error(f"Failed to setup OpenTelemetry tracing: {e}")
@@ -111,30 +101,27 @@ class OpenTelemetryConfig:
def setup_metrics(self) -> None:
"""Set up OpenTelemetry metrics."""
if not self.enabled:
logger.info("OpenTelemetry metrics disabled")
return
try:
# Configure metric readers
metric_readers = []
# Configure Prometheus exporter for metrics
# Always configure Prometheus exporter for metrics (for local development)
prometheus_endpoint = os.environ.get(
"OTEL_PROMETHEUS_ENDPOINT", "0.0.0.0:9464"
)
prometheus_reader = PrometheusMetricReader()
metric_readers.append(prometheus_reader)
# Configure OTLP exporter if endpoint is provided
otlp_endpoint = os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT")
if otlp_endpoint:
otlp_exporter = OTLPMetricExporter(endpoint=otlp_endpoint)
metric_reader = PeriodicExportingMetricReader(otlp_exporter)
metric_readers.append(metric_reader)
logger.info(
f"OpenTelemetry metrics configured with OTLP endpoint: {otlp_endpoint}"
)
# Configure OTLP exporter if export is enabled and endpoint is provided
if self.export_enabled:
otlp_endpoint = os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT")
if otlp_endpoint:
otlp_exporter = OTLPMetricExporter(endpoint=otlp_endpoint)
metric_reader = PeriodicExportingMetricReader(otlp_exporter)
metric_readers.append(metric_reader)
logger.info(
f"OpenTelemetry metrics configured with OTLP endpoint: {otlp_endpoint}"
)
# Set up meter provider with readers
meter_provider = MeterProvider(
@@ -152,10 +139,6 @@ class OpenTelemetryConfig:
def setup_instrumentation(self) -> None:
"""Set up OpenTelemetry instrumentation for various libraries."""
if not self.enabled:
logger.info("OpenTelemetry instrumentation disabled")
return
try:
# Django instrumentation
DjangoInstrumentor().instrument()
@@ -213,9 +196,12 @@ def setup_opentelemetry() -> None:
ot_config.setup_instrumentation()
if ot_config.enabled:
logger.info("OpenTelemetry setup completed successfully")
if ot_config.export_enabled:
logger.info("OpenTelemetry setup completed successfully (export enabled)")
else:
logger.info("OpenTelemetry setup completed successfully (export disabled)")
else:
logger.info("OpenTelemetry setup skipped (disabled)")
logger.info("OpenTelemetry setup failed")
def get_tracer(name: str) -> trace.Tracer:
@@ -229,5 +215,10 @@ def get_meter(name: str) -> metrics.Meter:
def is_enabled() -> bool:
"""Check if OpenTelemetry is enabled."""
return get_ot_config().enabled
"""Check if OpenTelemetry is enabled (always True now)."""
return True
def is_export_enabled() -> bool:
"""Check if OpenTelemetry data export is enabled."""
return get_ot_config().export_enabled

View File

@@ -35,10 +35,7 @@ class OpenTelemetryMiddleware(MiddlewareMixin):
# Don't get metrics instance here - get it lazily in __call__
def __call__(self, request):
if not is_enabled():
return self.get_response(request)
# Get metrics instance lazily when OpenTelemetry is enabled
# Get metrics instance lazily
if not hasattr(self, "metrics"):
self.metrics = get_avatar_metrics()
@@ -54,9 +51,6 @@ class OpenTelemetryMiddleware(MiddlewareMixin):
def process_request(self, request: HttpRequest) -> None:
"""Process incoming request and start tracing."""
if not is_enabled():
return
# Start span for the request
span_name = f"{request.method} {request.path}"
span = get_tracer("ivatar.middleware").start_span(span_name)
@@ -87,9 +81,6 @@ class OpenTelemetryMiddleware(MiddlewareMixin):
self, request: HttpRequest, response: HttpResponse
) -> HttpResponse:
"""Process response and complete tracing."""
if not is_enabled():
return response
span = getattr(request, "_ot_span", None)
if not span:
return response
@@ -228,9 +219,6 @@ def trace_file_upload(operation_name: str):
def decorator(func):
@wraps(func)
def wrapper(*args, **kwargs):
if not is_enabled():
return func(*args, **kwargs)
tracer = get_tracer("ivatar.file_upload")
with tracer.start_as_current_span(f"file_upload.{operation_name}") as span:
try:
@@ -271,9 +259,6 @@ def trace_authentication(operation_name: str):
def decorator(func):
@wraps(func)
def wrapper(*args, **kwargs):
if not is_enabled():
return func(*args, **kwargs)
tracer = get_tracer("ivatar.auth")
with tracer.start_as_current_span(f"auth.{operation_name}") as span:
try:
@@ -299,9 +284,6 @@ class AvatarMetrics:
"""
def __init__(self):
if not is_enabled():
return
self.meter = get_meter("ivatar.avatar")
# Create custom metrics
@@ -349,9 +331,6 @@ class AvatarMetrics:
def record_avatar_request(self, size: str, format_type: str):
"""Record avatar request."""
if not is_enabled():
return
self.avatar_requests.add(
1,
{
@@ -364,9 +343,6 @@ class AvatarMetrics:
self, size: str, format_type: str, source: str = "generated"
):
"""Record avatar generation."""
if not is_enabled():
return
self.avatar_generated.add(
1,
{
@@ -378,9 +354,6 @@ class AvatarMetrics:
def record_cache_hit(self, size: str, format_type: str):
"""Record cache hit."""
if not is_enabled():
return
self.avatar_cache_hits.add(
1,
{
@@ -391,9 +364,6 @@ class AvatarMetrics:
def record_cache_miss(self, size: str, format_type: str):
"""Record cache miss."""
if not is_enabled():
return
self.avatar_cache_misses.add(
1,
{
@@ -404,9 +374,6 @@ class AvatarMetrics:
def record_external_request(self, service: str, status_code: int):
"""Record external avatar service request."""
if not is_enabled():
return
self.external_avatar_requests.add(
1,
{
@@ -417,9 +384,6 @@ class AvatarMetrics:
def record_file_upload(self, file_size: int, content_type: str, success: bool):
"""Record file upload."""
if not is_enabled():
return
self.file_uploads.add(
1,
{

View File

@@ -39,27 +39,10 @@ class OpenTelemetryConfigTest(TestCase):
os.environ.clear()
os.environ.update(self.original_env)
def test_config_disabled_by_default(self):
"""Test that OpenTelemetry is disabled by default."""
# Clear environment variables to test default behavior
original_env = os.environ.copy()
os.environ.pop("ENABLE_OPENTELEMETRY", None)
os.environ.pop("OTEL_ENABLED", None)
try:
config = OpenTelemetryConfig()
# In CI environment, OpenTelemetry might be enabled by CI config
# So we test that the config respects the environment variables
if (
"OTEL_ENABLED" in original_env
and original_env["OTEL_ENABLED"] == "true"
):
self.assertTrue(config.enabled)
else:
self.assertFalse(config.enabled)
finally:
os.environ.clear()
os.environ.update(original_env)
def test_config_always_enabled(self):
"""Test that OpenTelemetry instrumentation is always enabled."""
config = OpenTelemetryConfig()
self.assertTrue(config.enabled)
def test_config_enabled_with_env_var(self):
"""Test that OpenTelemetry can be enabled with environment variable."""
@@ -194,22 +177,9 @@ class OpenTelemetryMiddlewareTest(TestCase):
reset_avatar_metrics() # Reset global metrics instance
self.middleware = OpenTelemetryMiddleware(lambda r: HttpResponse("test"))
@patch("ivatar.opentelemetry_middleware.is_enabled")
def test_middleware_disabled(self, mock_enabled):
"""Test middleware when OpenTelemetry is disabled."""
mock_enabled.return_value = False
request = self.factory.get("/avatar/test@example.com")
response = self.middleware(request)
self.assertEqual(response.status_code, 200)
self.assertFalse(hasattr(request, "_ot_span"))
@patch("ivatar.opentelemetry_middleware.is_enabled")
@patch("ivatar.opentelemetry_middleware.get_tracer")
def test_middleware_enabled(self, mock_get_tracer, mock_enabled):
def test_middleware_enabled(self, mock_get_tracer):
"""Test middleware when OpenTelemetry is enabled."""
mock_enabled.return_value = True
mock_tracer = MagicMock()
mock_span = MagicMock()
mock_tracer.start_span.return_value = mock_span
@@ -224,11 +194,9 @@ class OpenTelemetryMiddlewareTest(TestCase):
mock_span.set_attributes.assert_called()
mock_span.end.assert_called_once()
@patch("ivatar.opentelemetry_middleware.is_enabled")
@patch("ivatar.opentelemetry_middleware.get_tracer")
def test_avatar_request_attributes(self, mock_get_tracer, mock_enabled):
def test_avatar_request_attributes(self, mock_get_tracer):
"""Test that avatar requests get proper attributes."""
mock_enabled.return_value = True
mock_tracer = MagicMock()
mock_span = MagicMock()
mock_tracer.start_span.return_value = mock_span
@@ -286,23 +254,9 @@ class AvatarMetricsTest(TestCase):
"""Set up test environment."""
self.metrics = AvatarMetrics()
@patch("ivatar.opentelemetry_middleware.is_enabled")
def test_metrics_disabled(self, mock_enabled):
"""Test metrics when OpenTelemetry is disabled."""
mock_enabled.return_value = False
# Should not raise any exceptions
self.metrics.record_avatar_generated("128", "png", "generated")
self.metrics.record_cache_hit("128", "png")
self.metrics.record_cache_miss("128", "png")
self.metrics.record_external_request("gravatar", 200)
self.metrics.record_file_upload(1024, "image/png", True)
@patch("ivatar.opentelemetry_middleware.is_enabled")
@patch("ivatar.opentelemetry_middleware.get_meter")
def test_metrics_enabled(self, mock_get_meter, mock_enabled):
def test_metrics_enabled(self, mock_get_meter):
"""Test metrics when OpenTelemetry is enabled."""
mock_enabled.return_value = True
mock_meter = MagicMock()
mock_counter = MagicMock()
mock_histogram = MagicMock()
@@ -333,11 +287,9 @@ class AvatarMetricsTest(TestCase):
class TracingDecoratorsTest(TestCase):
"""Test tracing decorators."""
@patch("ivatar.opentelemetry_middleware.is_enabled")
@patch("ivatar.opentelemetry_middleware.get_tracer")
def test_trace_avatar_operation(self, mock_get_tracer, mock_enabled):
def test_trace_avatar_operation(self, mock_get_tracer):
"""Test trace_avatar_operation decorator."""
mock_enabled.return_value = True
mock_tracer = MagicMock()
mock_span = MagicMock()
mock_tracer.start_as_current_span.return_value.__enter__.return_value = (
@@ -357,11 +309,9 @@ class TracingDecoratorsTest(TestCase):
)
mock_span.set_status.assert_called_once()
@patch("ivatar.opentelemetry_middleware.is_enabled")
@patch("ivatar.opentelemetry_middleware.get_tracer")
def test_trace_avatar_operation_exception(self, mock_get_tracer, mock_enabled):
def test_trace_avatar_operation_exception(self, mock_get_tracer):
"""Test trace_avatar_operation decorator with exception."""
mock_enabled.return_value = True
mock_tracer = MagicMock()
mock_span = MagicMock()
mock_tracer.start_as_current_span.return_value.__enter__.return_value = (
@@ -379,10 +329,8 @@ class TracingDecoratorsTest(TestCase):
mock_span.set_status.assert_called_once()
mock_span.set_attribute.assert_called_with("error.message", "test error")
@patch("ivatar.opentelemetry_middleware.is_enabled")
def test_trace_file_upload(self, mock_enabled):
def test_trace_file_upload(self):
"""Test trace_file_upload decorator."""
mock_enabled.return_value = True
@trace_file_upload("test_upload")
def test_function():
@@ -391,10 +339,8 @@ class TracingDecoratorsTest(TestCase):
result = test_function()
self.assertEqual(result, "success")
@patch("ivatar.opentelemetry_middleware.is_enabled")
def test_trace_authentication(self, mock_enabled):
def test_trace_authentication(self):
"""Test trace_authentication decorator."""
mock_enabled.return_value = True
@trace_authentication("test_auth")
def test_function():
@@ -427,24 +373,8 @@ class IntegrationTest(TestCase):
def test_is_enabled_function(self):
"""Test is_enabled function."""
# Clear environment variables to test default behavior
original_env = os.environ.copy()
os.environ.pop("ENABLE_OPENTELEMETRY", None)
os.environ.pop("OTEL_ENABLED", None)
try:
# In CI environment, OpenTelemetry might be enabled by CI config
# So we test that the function respects the environment variables
if (
"OTEL_ENABLED" in original_env
and original_env["OTEL_ENABLED"] == "true"
):
self.assertTrue(is_enabled())
else:
self.assertFalse(is_enabled())
finally:
os.environ.clear()
os.environ.update(original_env)
# OpenTelemetry is now always enabled
self.assertTrue(is_enabled())
# Test enabled with environment variable
os.environ["OTEL_ENABLED"] = "true"
@@ -467,22 +397,13 @@ class OpenTelemetryDisabledTest(TestCase):
os.environ.clear()
os.environ.update(self.original_env)
def test_opentelemetry_disabled_by_default(self):
"""Test that OpenTelemetry is disabled by default."""
# In CI environment, OpenTelemetry might be enabled by CI config
# So we test that the function respects the environment variables
if "OTEL_ENABLED" in os.environ and os.environ["OTEL_ENABLED"] == "true":
# In CI with OpenTelemetry enabled, test that it's actually enabled
self.assertTrue(is_enabled())
else:
# Skip this test in CI environments where OpenTelemetry is enabled
# since we can't properly test "disabled by default" behavior
self.skipTest(
"Cannot test disabled behavior in OpenTelemetry-enabled environment"
)
def test_opentelemetry_always_enabled(self):
"""Test that OpenTelemetry instrumentation is always enabled."""
# OpenTelemetry instrumentation is now always enabled
self.assertTrue(is_enabled())
def test_no_op_decorators_work(self):
"""Test that no-op decorators work when OpenTelemetry is disabled."""
def test_decorators_work(self):
"""Test that decorators work when OpenTelemetry is enabled."""
@trace_avatar_operation("test_operation")
def test_function():
@@ -491,19 +412,19 @@ class OpenTelemetryDisabledTest(TestCase):
result = test_function()
self.assertEqual(result, "success")
def test_no_op_metrics_work(self):
"""Test that no-op metrics work when OpenTelemetry is disabled."""
def test_metrics_work(self):
"""Test that metrics work when OpenTelemetry is enabled."""
avatar_metrics = get_avatar_metrics()
# These should not raise exceptions
avatar_metrics.record_avatar_generated("80", "png", "uploaded")
avatar_metrics.record_cache_hit("80", "png")
avatar_metrics.record_cache_miss("80", "png")
avatar_metrics.record_external_request("gravatar", "success")
avatar_metrics.record_file_upload("success", "image/png", True)
avatar_metrics.record_external_request("gravatar", 200)
avatar_metrics.record_file_upload(1024, "image/png", True)
def test_middleware_disabled(self):
"""Test that middleware works when OpenTelemetry is disabled."""
def test_middleware_enabled(self):
"""Test that middleware works when OpenTelemetry is enabled."""
factory = RequestFactory()
middleware = OpenTelemetryMiddleware(lambda r: HttpResponse("test"))