diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 7c469bf..2556f46 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -11,56 +11,21 @@ cache: variables: PIP_CACHE_DIR: .pipcache -# Test without OpenTelemetry (baseline testing) -test_without_opentelemetry: - stage: build - services: - - postgres:latest - variables: - POSTGRES_DB: django_db_no_otel - POSTGRES_USER: django_user - POSTGRES_PASSWORD: django_password - POSTGRES_HOST: postgres - DATABASE_URL: "postgres://django_user:django_password@postgres/django_db_no_otel" - PYTHONUNBUFFERED: 1 - # Ensure OpenTelemetry is disabled - ENABLE_OPENTELEMETRY: "false" - OTEL_ENABLED: "false" - before_script: - - virtualenv -p python3 /tmp/.virtualenv - - source /tmp/.virtualenv/bin/activate - - pip install -U pip - - pip install Pillow - - pip install -r requirements.txt - - pip install pycco - script: - - source /tmp/.virtualenv/bin/activate - - echo 'from ivatar.settings import TEMPLATES' > config_local.py - - echo 'TEMPLATES[0]["OPTIONS"]["debug"] = True' >> config_local.py - - echo "DEBUG = True" >> config_local.py - - echo "from config import CACHES" >> config_local.py - - echo "CACHES['default'] = CACHES['filesystem']" >> config_local.py - - python manage.py sqldsn - - python manage.py collectstatic --noinput - - echo "Running tests without OpenTelemetry..." - - ./scripts/run_tests_no_ot.sh - -# Test with OpenTelemetry enabled and measure coverage -test_with_opentelemetry_and_coverage: +# Test with OpenTelemetry instrumentation (always enabled, export disabled in CI) +test_and_coverage: stage: build coverage: "/^TOTAL.*\\s+(\\d+\\%)$/" services: - postgres:latest variables: - POSTGRES_DB: django_db_with_otel + POSTGRES_DB: django_db POSTGRES_USER: django_user POSTGRES_PASSWORD: django_password POSTGRES_HOST: postgres - DATABASE_URL: "postgres://django_user:django_password@postgres/django_db_with_otel" + DATABASE_URL: "postgres://django_user:django_password@postgres/django_db" PYTHONUNBUFFERED: 1 - # Enable OpenTelemetry for comprehensive testing - ENABLE_OPENTELEMETRY: "true" - OTEL_ENABLED: "true" + # OpenTelemetry instrumentation always enabled, export controlled by OTEL_EXPORT_ENABLED + OTEL_EXPORT_ENABLED: "false" # Disable export in CI to avoid external dependencies OTEL_SERVICE_NAME: "ivatar-ci" OTEL_ENVIRONMENT: "ci" before_script: @@ -82,7 +47,7 @@ test_with_opentelemetry_and_coverage: - echo "CACHES['default'] = CACHES['filesystem']" >> config_local.py - python manage.py sqldsn - python manage.py collectstatic --noinput - - echo "Running tests with OpenTelemetry enabled and measuring coverage..." + - echo "Running tests with OpenTelemetry instrumentation enabled..." - coverage run --source . scripts/run_tests_with_coverage.py - coverage report --fail-under=70 - coverage html @@ -113,7 +78,7 @@ pycco: pages: stage: deploy dependencies: - - test_with_opentelemetry_and_coverage + - test_and_coverage - pycco script: - mv htmlcov/ public/ diff --git a/README.md b/README.md index 755b644..293ec92 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,50 @@ - [Coverage HTML report](http://oliver.git.linux-kernel.at/ivatar) - [Code documentation (autogenerated, pycco)](http://oliver.git.linux-kernel.at/ivatar/pycco/) +# Environment Variables + +## OpenTelemetry Configuration + +OpenTelemetry instrumentation is always enabled in ivatar. The following environment variables control the behavior: + +### Core Configuration + +- `OTEL_SERVICE_NAME`: Service name for OpenTelemetry (default: "ivatar") +- `OTEL_ENVIRONMENT`: Deployment environment (default: "production") +- `OTEL_EXPORT_ENABLED`: Enable/disable data export (default: "false") + - Set to "true" to enable sending telemetry data to external collectors + - Set to "false" to disable export (instrumentation still active) + +### Export Configuration + +- `OTEL_EXPORTER_OTLP_ENDPOINT`: OTLP endpoint for traces and metrics export + - Example: "http://localhost:4317" (gRPC) or "http://localhost:4318" (HTTP) +- `OTEL_PROMETHEUS_ENDPOINT`: Prometheus metrics endpoint (default: "0.0.0.0:9464") + +### Legacy Configuration (Deprecated) + +- `ENABLE_OPENTELEMETRY`: Legacy flag, no longer used (instrumentation always enabled) +- `OTEL_ENABLED`: Legacy flag, no longer used (instrumentation always enabled) + +## Example Configurations + +### Development (Export Disabled) + +```bash +export OTEL_EXPORT_ENABLED=false +export OTEL_SERVICE_NAME=ivatar-dev +export OTEL_ENVIRONMENT=development +``` + +### Production (Export Enabled) + +```bash +export OTEL_EXPORT_ENABLED=true +export OTEL_SERVICE_NAME=ivatar +export OTEL_ENVIRONMENT=production +export OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4317 +``` + # Testing ## Running Tests diff --git a/config.py b/config.py index 8d52644..e4d31bf 100644 --- a/config.py +++ b/config.py @@ -35,6 +35,9 @@ MIDDLEWARE.extend( ] ) +# Add OpenTelemetry middleware (always enabled now) +MIDDLEWARE.insert(0, "ivatar.opentelemetry_middleware.OpenTelemetryMiddleware") + # Add OpenTelemetry middleware only if feature flag is enabled # Note: This will be checked at runtime, not at import time MIDDLEWARE.insert( diff --git a/ivatar/opentelemetry_config.py b/ivatar/opentelemetry_config.py index 6a812ae..5e5251e 100644 --- a/ivatar/opentelemetry_config.py +++ b/ivatar/opentelemetry_config.py @@ -37,30 +37,19 @@ class OpenTelemetryConfig: """ def __init__(self): - self.enabled = self._is_enabled() + self.enabled = True # Always enable OpenTelemetry instrumentation + self.export_enabled = self._is_export_enabled() self.service_name = self._get_service_name() self.environment = self._get_environment() self.resource = self._create_resource() - def _is_enabled(self) -> bool: - """Check if OpenTelemetry is enabled via environment variable and Django settings.""" - # First check Django settings (for F/LOSS deployments) - try: - from django.conf import settings - from django.core.exceptions import ImproperlyConfigured - - try: - if getattr(settings, "ENABLE_OPENTELEMETRY", False): - return True - except ImproperlyConfigured: - # Django settings not configured yet, fall back to environment variable - pass - except ImportError: - # Django not available yet, fall back to environment variable - pass - - # Then check OpenTelemetry-specific environment variable - return os.environ.get("OTEL_ENABLED", "false").lower() in ("true", "1", "yes") + def _is_export_enabled(self) -> bool: + """Check if OpenTelemetry data export is enabled via environment variable.""" + return os.environ.get("OTEL_EXPORT_ENABLED", "false").lower() in ( + "true", + "1", + "yes", + ) def _get_service_name(self) -> str: """Get service name from environment or default.""" @@ -84,26 +73,27 @@ class OpenTelemetryConfig: def setup_tracing(self) -> None: """Set up OpenTelemetry tracing.""" - if not self.enabled: - logger.info("OpenTelemetry tracing disabled") - return - try: # Set up tracer provider trace.set_tracer_provider(TracerProvider(resource=self.resource)) tracer_provider = trace.get_tracer_provider() - # Configure OTLP exporter if endpoint is provided - otlp_endpoint = os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT") - if otlp_endpoint: - otlp_exporter = OTLPSpanExporter(endpoint=otlp_endpoint) - span_processor = BatchSpanProcessor(otlp_exporter) - tracer_provider.add_span_processor(span_processor) - logger.info( - f"OpenTelemetry tracing configured with OTLP endpoint: {otlp_endpoint}" - ) + # Configure OTLP exporter if export is enabled and endpoint is provided + if self.export_enabled: + otlp_endpoint = os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT") + if otlp_endpoint: + otlp_exporter = OTLPSpanExporter(endpoint=otlp_endpoint) + span_processor = BatchSpanProcessor(otlp_exporter) + tracer_provider.add_span_processor(span_processor) + logger.info( + f"OpenTelemetry tracing configured with OTLP endpoint: {otlp_endpoint}" + ) + else: + logger.info( + "OpenTelemetry tracing configured without OTLP endpoint" + ) else: - logger.info("OpenTelemetry tracing configured without OTLP exporter") + logger.info("OpenTelemetry tracing configured (export disabled)") except Exception as e: logger.error(f"Failed to setup OpenTelemetry tracing: {e}") @@ -111,30 +101,27 @@ class OpenTelemetryConfig: def setup_metrics(self) -> None: """Set up OpenTelemetry metrics.""" - if not self.enabled: - logger.info("OpenTelemetry metrics disabled") - return - try: # Configure metric readers metric_readers = [] - # Configure Prometheus exporter for metrics + # Always configure Prometheus exporter for metrics (for local development) prometheus_endpoint = os.environ.get( "OTEL_PROMETHEUS_ENDPOINT", "0.0.0.0:9464" ) prometheus_reader = PrometheusMetricReader() metric_readers.append(prometheus_reader) - # Configure OTLP exporter if endpoint is provided - otlp_endpoint = os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT") - if otlp_endpoint: - otlp_exporter = OTLPMetricExporter(endpoint=otlp_endpoint) - metric_reader = PeriodicExportingMetricReader(otlp_exporter) - metric_readers.append(metric_reader) - logger.info( - f"OpenTelemetry metrics configured with OTLP endpoint: {otlp_endpoint}" - ) + # Configure OTLP exporter if export is enabled and endpoint is provided + if self.export_enabled: + otlp_endpoint = os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT") + if otlp_endpoint: + otlp_exporter = OTLPMetricExporter(endpoint=otlp_endpoint) + metric_reader = PeriodicExportingMetricReader(otlp_exporter) + metric_readers.append(metric_reader) + logger.info( + f"OpenTelemetry metrics configured with OTLP endpoint: {otlp_endpoint}" + ) # Set up meter provider with readers meter_provider = MeterProvider( @@ -152,10 +139,6 @@ class OpenTelemetryConfig: def setup_instrumentation(self) -> None: """Set up OpenTelemetry instrumentation for various libraries.""" - if not self.enabled: - logger.info("OpenTelemetry instrumentation disabled") - return - try: # Django instrumentation DjangoInstrumentor().instrument() @@ -213,9 +196,12 @@ def setup_opentelemetry() -> None: ot_config.setup_instrumentation() if ot_config.enabled: - logger.info("OpenTelemetry setup completed successfully") + if ot_config.export_enabled: + logger.info("OpenTelemetry setup completed successfully (export enabled)") + else: + logger.info("OpenTelemetry setup completed successfully (export disabled)") else: - logger.info("OpenTelemetry setup skipped (disabled)") + logger.info("OpenTelemetry setup failed") def get_tracer(name: str) -> trace.Tracer: @@ -229,5 +215,10 @@ def get_meter(name: str) -> metrics.Meter: def is_enabled() -> bool: - """Check if OpenTelemetry is enabled.""" - return get_ot_config().enabled + """Check if OpenTelemetry is enabled (always True now).""" + return True + + +def is_export_enabled() -> bool: + """Check if OpenTelemetry data export is enabled.""" + return get_ot_config().export_enabled diff --git a/ivatar/opentelemetry_middleware.py b/ivatar/opentelemetry_middleware.py index 9db81d2..18fe01f 100644 --- a/ivatar/opentelemetry_middleware.py +++ b/ivatar/opentelemetry_middleware.py @@ -35,10 +35,7 @@ class OpenTelemetryMiddleware(MiddlewareMixin): # Don't get metrics instance here - get it lazily in __call__ def __call__(self, request): - if not is_enabled(): - return self.get_response(request) - - # Get metrics instance lazily when OpenTelemetry is enabled + # Get metrics instance lazily if not hasattr(self, "metrics"): self.metrics = get_avatar_metrics() @@ -54,9 +51,6 @@ class OpenTelemetryMiddleware(MiddlewareMixin): def process_request(self, request: HttpRequest) -> None: """Process incoming request and start tracing.""" - if not is_enabled(): - return - # Start span for the request span_name = f"{request.method} {request.path}" span = get_tracer("ivatar.middleware").start_span(span_name) @@ -87,9 +81,6 @@ class OpenTelemetryMiddleware(MiddlewareMixin): self, request: HttpRequest, response: HttpResponse ) -> HttpResponse: """Process response and complete tracing.""" - if not is_enabled(): - return response - span = getattr(request, "_ot_span", None) if not span: return response @@ -228,9 +219,6 @@ def trace_file_upload(operation_name: str): def decorator(func): @wraps(func) def wrapper(*args, **kwargs): - if not is_enabled(): - return func(*args, **kwargs) - tracer = get_tracer("ivatar.file_upload") with tracer.start_as_current_span(f"file_upload.{operation_name}") as span: try: @@ -271,9 +259,6 @@ def trace_authentication(operation_name: str): def decorator(func): @wraps(func) def wrapper(*args, **kwargs): - if not is_enabled(): - return func(*args, **kwargs) - tracer = get_tracer("ivatar.auth") with tracer.start_as_current_span(f"auth.{operation_name}") as span: try: @@ -299,9 +284,6 @@ class AvatarMetrics: """ def __init__(self): - if not is_enabled(): - return - self.meter = get_meter("ivatar.avatar") # Create custom metrics @@ -349,9 +331,6 @@ class AvatarMetrics: def record_avatar_request(self, size: str, format_type: str): """Record avatar request.""" - if not is_enabled(): - return - self.avatar_requests.add( 1, { @@ -364,9 +343,6 @@ class AvatarMetrics: self, size: str, format_type: str, source: str = "generated" ): """Record avatar generation.""" - if not is_enabled(): - return - self.avatar_generated.add( 1, { @@ -378,9 +354,6 @@ class AvatarMetrics: def record_cache_hit(self, size: str, format_type: str): """Record cache hit.""" - if not is_enabled(): - return - self.avatar_cache_hits.add( 1, { @@ -391,9 +364,6 @@ class AvatarMetrics: def record_cache_miss(self, size: str, format_type: str): """Record cache miss.""" - if not is_enabled(): - return - self.avatar_cache_misses.add( 1, { @@ -404,9 +374,6 @@ class AvatarMetrics: def record_external_request(self, service: str, status_code: int): """Record external avatar service request.""" - if not is_enabled(): - return - self.external_avatar_requests.add( 1, { @@ -417,9 +384,6 @@ class AvatarMetrics: def record_file_upload(self, file_size: int, content_type: str, success: bool): """Record file upload.""" - if not is_enabled(): - return - self.file_uploads.add( 1, { diff --git a/ivatar/test_opentelemetry.py b/ivatar/test_opentelemetry.py index 685580c..3dd8f51 100644 --- a/ivatar/test_opentelemetry.py +++ b/ivatar/test_opentelemetry.py @@ -39,27 +39,10 @@ class OpenTelemetryConfigTest(TestCase): os.environ.clear() os.environ.update(self.original_env) - def test_config_disabled_by_default(self): - """Test that OpenTelemetry is disabled by default.""" - # Clear environment variables to test default behavior - original_env = os.environ.copy() - os.environ.pop("ENABLE_OPENTELEMETRY", None) - os.environ.pop("OTEL_ENABLED", None) - - try: - config = OpenTelemetryConfig() - # In CI environment, OpenTelemetry might be enabled by CI config - # So we test that the config respects the environment variables - if ( - "OTEL_ENABLED" in original_env - and original_env["OTEL_ENABLED"] == "true" - ): - self.assertTrue(config.enabled) - else: - self.assertFalse(config.enabled) - finally: - os.environ.clear() - os.environ.update(original_env) + def test_config_always_enabled(self): + """Test that OpenTelemetry instrumentation is always enabled.""" + config = OpenTelemetryConfig() + self.assertTrue(config.enabled) def test_config_enabled_with_env_var(self): """Test that OpenTelemetry can be enabled with environment variable.""" @@ -194,22 +177,9 @@ class OpenTelemetryMiddlewareTest(TestCase): reset_avatar_metrics() # Reset global metrics instance self.middleware = OpenTelemetryMiddleware(lambda r: HttpResponse("test")) - @patch("ivatar.opentelemetry_middleware.is_enabled") - def test_middleware_disabled(self, mock_enabled): - """Test middleware when OpenTelemetry is disabled.""" - mock_enabled.return_value = False - - request = self.factory.get("/avatar/test@example.com") - response = self.middleware(request) - - self.assertEqual(response.status_code, 200) - self.assertFalse(hasattr(request, "_ot_span")) - - @patch("ivatar.opentelemetry_middleware.is_enabled") @patch("ivatar.opentelemetry_middleware.get_tracer") - def test_middleware_enabled(self, mock_get_tracer, mock_enabled): + def test_middleware_enabled(self, mock_get_tracer): """Test middleware when OpenTelemetry is enabled.""" - mock_enabled.return_value = True mock_tracer = MagicMock() mock_span = MagicMock() mock_tracer.start_span.return_value = mock_span @@ -224,11 +194,9 @@ class OpenTelemetryMiddlewareTest(TestCase): mock_span.set_attributes.assert_called() mock_span.end.assert_called_once() - @patch("ivatar.opentelemetry_middleware.is_enabled") @patch("ivatar.opentelemetry_middleware.get_tracer") - def test_avatar_request_attributes(self, mock_get_tracer, mock_enabled): + def test_avatar_request_attributes(self, mock_get_tracer): """Test that avatar requests get proper attributes.""" - mock_enabled.return_value = True mock_tracer = MagicMock() mock_span = MagicMock() mock_tracer.start_span.return_value = mock_span @@ -286,23 +254,9 @@ class AvatarMetricsTest(TestCase): """Set up test environment.""" self.metrics = AvatarMetrics() - @patch("ivatar.opentelemetry_middleware.is_enabled") - def test_metrics_disabled(self, mock_enabled): - """Test metrics when OpenTelemetry is disabled.""" - mock_enabled.return_value = False - - # Should not raise any exceptions - self.metrics.record_avatar_generated("128", "png", "generated") - self.metrics.record_cache_hit("128", "png") - self.metrics.record_cache_miss("128", "png") - self.metrics.record_external_request("gravatar", 200) - self.metrics.record_file_upload(1024, "image/png", True) - - @patch("ivatar.opentelemetry_middleware.is_enabled") @patch("ivatar.opentelemetry_middleware.get_meter") - def test_metrics_enabled(self, mock_get_meter, mock_enabled): + def test_metrics_enabled(self, mock_get_meter): """Test metrics when OpenTelemetry is enabled.""" - mock_enabled.return_value = True mock_meter = MagicMock() mock_counter = MagicMock() mock_histogram = MagicMock() @@ -333,11 +287,9 @@ class AvatarMetricsTest(TestCase): class TracingDecoratorsTest(TestCase): """Test tracing decorators.""" - @patch("ivatar.opentelemetry_middleware.is_enabled") @patch("ivatar.opentelemetry_middleware.get_tracer") - def test_trace_avatar_operation(self, mock_get_tracer, mock_enabled): + def test_trace_avatar_operation(self, mock_get_tracer): """Test trace_avatar_operation decorator.""" - mock_enabled.return_value = True mock_tracer = MagicMock() mock_span = MagicMock() mock_tracer.start_as_current_span.return_value.__enter__.return_value = ( @@ -357,11 +309,9 @@ class TracingDecoratorsTest(TestCase): ) mock_span.set_status.assert_called_once() - @patch("ivatar.opentelemetry_middleware.is_enabled") @patch("ivatar.opentelemetry_middleware.get_tracer") - def test_trace_avatar_operation_exception(self, mock_get_tracer, mock_enabled): + def test_trace_avatar_operation_exception(self, mock_get_tracer): """Test trace_avatar_operation decorator with exception.""" - mock_enabled.return_value = True mock_tracer = MagicMock() mock_span = MagicMock() mock_tracer.start_as_current_span.return_value.__enter__.return_value = ( @@ -379,10 +329,8 @@ class TracingDecoratorsTest(TestCase): mock_span.set_status.assert_called_once() mock_span.set_attribute.assert_called_with("error.message", "test error") - @patch("ivatar.opentelemetry_middleware.is_enabled") - def test_trace_file_upload(self, mock_enabled): + def test_trace_file_upload(self): """Test trace_file_upload decorator.""" - mock_enabled.return_value = True @trace_file_upload("test_upload") def test_function(): @@ -391,10 +339,8 @@ class TracingDecoratorsTest(TestCase): result = test_function() self.assertEqual(result, "success") - @patch("ivatar.opentelemetry_middleware.is_enabled") - def test_trace_authentication(self, mock_enabled): + def test_trace_authentication(self): """Test trace_authentication decorator.""" - mock_enabled.return_value = True @trace_authentication("test_auth") def test_function(): @@ -427,24 +373,8 @@ class IntegrationTest(TestCase): def test_is_enabled_function(self): """Test is_enabled function.""" - # Clear environment variables to test default behavior - original_env = os.environ.copy() - os.environ.pop("ENABLE_OPENTELEMETRY", None) - os.environ.pop("OTEL_ENABLED", None) - - try: - # In CI environment, OpenTelemetry might be enabled by CI config - # So we test that the function respects the environment variables - if ( - "OTEL_ENABLED" in original_env - and original_env["OTEL_ENABLED"] == "true" - ): - self.assertTrue(is_enabled()) - else: - self.assertFalse(is_enabled()) - finally: - os.environ.clear() - os.environ.update(original_env) + # OpenTelemetry is now always enabled + self.assertTrue(is_enabled()) # Test enabled with environment variable os.environ["OTEL_ENABLED"] = "true" @@ -467,22 +397,13 @@ class OpenTelemetryDisabledTest(TestCase): os.environ.clear() os.environ.update(self.original_env) - def test_opentelemetry_disabled_by_default(self): - """Test that OpenTelemetry is disabled by default.""" - # In CI environment, OpenTelemetry might be enabled by CI config - # So we test that the function respects the environment variables - if "OTEL_ENABLED" in os.environ and os.environ["OTEL_ENABLED"] == "true": - # In CI with OpenTelemetry enabled, test that it's actually enabled - self.assertTrue(is_enabled()) - else: - # Skip this test in CI environments where OpenTelemetry is enabled - # since we can't properly test "disabled by default" behavior - self.skipTest( - "Cannot test disabled behavior in OpenTelemetry-enabled environment" - ) + def test_opentelemetry_always_enabled(self): + """Test that OpenTelemetry instrumentation is always enabled.""" + # OpenTelemetry instrumentation is now always enabled + self.assertTrue(is_enabled()) - def test_no_op_decorators_work(self): - """Test that no-op decorators work when OpenTelemetry is disabled.""" + def test_decorators_work(self): + """Test that decorators work when OpenTelemetry is enabled.""" @trace_avatar_operation("test_operation") def test_function(): @@ -491,19 +412,19 @@ class OpenTelemetryDisabledTest(TestCase): result = test_function() self.assertEqual(result, "success") - def test_no_op_metrics_work(self): - """Test that no-op metrics work when OpenTelemetry is disabled.""" + def test_metrics_work(self): + """Test that metrics work when OpenTelemetry is enabled.""" avatar_metrics = get_avatar_metrics() # These should not raise exceptions avatar_metrics.record_avatar_generated("80", "png", "uploaded") avatar_metrics.record_cache_hit("80", "png") avatar_metrics.record_cache_miss("80", "png") - avatar_metrics.record_external_request("gravatar", "success") - avatar_metrics.record_file_upload("success", "image/png", True) + avatar_metrics.record_external_request("gravatar", 200) + avatar_metrics.record_file_upload(1024, "image/png", True) - def test_middleware_disabled(self): - """Test that middleware works when OpenTelemetry is disabled.""" + def test_middleware_enabled(self): + """Test that middleware works when OpenTelemetry is enabled.""" factory = RequestFactory() middleware = OpenTelemetryMiddleware(lambda r: HttpResponse("test"))