Files
ivatar/ivatar/opentelemetry_config.py
2025-10-22 14:05:44 +02:00

287 lines
11 KiB
Python

"""
OpenTelemetry configuration for ivatar project.
This module provides OpenTelemetry setup and configuration for the ivatar
Django application, including tracing, metrics, and logging integration.
"""
import os
import logging
from opentelemetry import trace, metrics
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import BatchSpanProcessor
from opentelemetry.sdk.metrics import MeterProvider
from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
from opentelemetry.sdk.resources import Resource
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter
from opentelemetry.exporter.prometheus import PrometheusMetricReader
from opentelemetry.instrumentation.psycopg2 import Psycopg2Instrumentor
from opentelemetry.instrumentation.pymysql import PyMySQLInstrumentor
from opentelemetry.instrumentation.requests import RequestsInstrumentor
from opentelemetry.instrumentation.urllib3 import URLLib3Instrumentor
# Note: Memcached instrumentation not available in OpenTelemetry Python
logger = logging.getLogger("ivatar")
class OpenTelemetryConfig:
"""
OpenTelemetry configuration manager for ivatar.
Handles setup of tracing, metrics, and instrumentation for the Django application.
"""
def __init__(self):
self.enabled = True # Always enable OpenTelemetry instrumentation
self.export_enabled = self._is_export_enabled()
self.service_name = self._get_service_name()
self.environment = self._get_environment()
self.resource = self._create_resource()
def _is_export_enabled(self) -> bool:
"""Check if OpenTelemetry data export is enabled via environment variable."""
return os.environ.get("OTEL_EXPORT_ENABLED", "false").lower() in (
"true",
"1",
"yes",
)
def _get_service_name(self) -> str:
"""Get service name from environment or default."""
return os.environ.get("OTEL_SERVICE_NAME", "ivatar")
def _get_environment(self) -> str:
"""Get environment name (production, development, etc.)."""
return os.environ.get("OTEL_ENVIRONMENT", "development")
def _create_resource(self) -> Resource:
"""Create OpenTelemetry resource with service information."""
return Resource.create(
{
"service.name": self.service_name,
"service.version": os.environ.get("IVATAR_VERSION", "1.8.0"),
"service.namespace": "libravatar",
"deployment.environment": self.environment,
"service.instance.id": os.environ.get("HOSTNAME", "unknown"),
}
)
def setup_tracing(self) -> None:
"""Set up OpenTelemetry tracing."""
try:
# Only set up tracing if export is enabled
if not self.export_enabled:
logger.info("OpenTelemetry tracing disabled (export disabled)")
return
# Set up tracer provider
trace.set_tracer_provider(TracerProvider(resource=self.resource))
tracer_provider = trace.get_tracer_provider()
# Configure OTLP exporter if endpoint is provided
otlp_endpoint = os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT")
if otlp_endpoint:
otlp_exporter = OTLPSpanExporter(endpoint=otlp_endpoint)
span_processor = BatchSpanProcessor(otlp_exporter)
tracer_provider.add_span_processor(span_processor)
logger.info(
f"OpenTelemetry tracing configured with OTLP endpoint: {otlp_endpoint}"
)
else:
logger.info("OpenTelemetry tracing configured without OTLP endpoint")
except Exception as e:
logger.error(f"Failed to setup OpenTelemetry tracing: {e}")
# Don't disable OpenTelemetry entirely - metrics and instrumentation can still work
def setup_metrics(self) -> None:
"""Set up OpenTelemetry metrics."""
try:
# Configure metric readers based on environment
metric_readers = []
# Configure OTLP exporter if export is enabled and endpoint is provided
if self.export_enabled:
otlp_endpoint = os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT")
if otlp_endpoint:
otlp_exporter = OTLPMetricExporter(endpoint=otlp_endpoint)
metric_reader = PeriodicExportingMetricReader(otlp_exporter)
metric_readers.append(metric_reader)
logger.info(
f"OpenTelemetry metrics configured with OTLP endpoint: {otlp_endpoint}"
)
# For development/local testing, also configure Prometheus HTTP server
# In production, metrics are scraped by external Prometheus server
prometheus_endpoint = os.environ.get("OTEL_PROMETHEUS_ENDPOINT")
if prometheus_endpoint:
prometheus_reader = PrometheusMetricReader()
metric_readers.append(prometheus_reader)
# Set up meter provider with readers
meter_provider = MeterProvider(
resource=self.resource, metric_readers=metric_readers
)
# Only set meter provider if it's not already set
try:
metrics.set_meter_provider(meter_provider)
except Exception as e:
if "Overriding of current MeterProvider is not allowed" in str(e):
logger.warning("MeterProvider already set, using existing provider")
# Get the existing meter provider and add our readers
existing_provider = metrics.get_meter_provider()
if hasattr(existing_provider, "add_metric_reader"):
for reader in metric_readers:
existing_provider.add_metric_reader(reader)
else:
raise
# Start Prometheus HTTP server for local development (if configured)
if prometheus_endpoint:
self._start_prometheus_server(prometheus_reader, prometheus_endpoint)
logger.info(
f"OpenTelemetry metrics configured with Prometheus endpoint: {prometheus_endpoint}"
)
if not metric_readers:
logger.warning(
"No metric readers configured - metrics will not be exported"
)
except Exception as e:
logger.error(f"Failed to setup OpenTelemetry metrics: {e}")
# Don't disable OpenTelemetry entirely - tracing and instrumentation can still work
def _start_prometheus_server(
self, prometheus_reader: PrometheusMetricReader, endpoint: str
) -> None:
"""Start Prometheus HTTP server for metrics endpoint."""
try:
from prometheus_client import start_http_server, REGISTRY
# Parse endpoint to get host and port
if ":" in endpoint:
host, port = endpoint.split(":", 1)
port = int(port)
else:
host = "0.0.0.0"
port = int(endpoint)
# Register the PrometheusMetricReader collector with prometheus_client
REGISTRY.register(prometheus_reader._collector)
# Start HTTP server
start_http_server(port, addr=host)
logger.info(f"Prometheus metrics server started on {host}:{port}")
except OSError as e:
if e.errno == 98: # Address already in use
logger.warning(
f"Prometheus metrics server already running on {endpoint}"
)
else:
logger.error(f"Failed to start Prometheus metrics server: {e}")
# Don't disable OpenTelemetry entirely - metrics can still be exported via OTLP
except Exception as e:
logger.error(f"Failed to start Prometheus metrics server: {e}")
# Don't disable OpenTelemetry entirely - metrics can still be exported via OTLP
def setup_instrumentation(self) -> None:
"""Set up OpenTelemetry instrumentation for various libraries."""
try:
# Django instrumentation - TEMPORARILY DISABLED TO TEST HEADER ISSUE
# DjangoInstrumentor().instrument()
# logger.info("Django instrumentation enabled")
# Database instrumentation
Psycopg2Instrumentor().instrument()
PyMySQLInstrumentor().instrument()
logger.info("Database instrumentation enabled")
# HTTP client instrumentation
RequestsInstrumentor().instrument()
URLLib3Instrumentor().instrument()
logger.info("HTTP client instrumentation enabled")
# Note: Memcached instrumentation not available in OpenTelemetry Python
# Cache operations will be traced through Django instrumentation
except Exception as e:
logger.error(f"Failed to setup OpenTelemetry instrumentation: {e}")
# Don't disable OpenTelemetry entirely - tracing and metrics can still work
def get_tracer(self, name: str) -> trace.Tracer:
"""Get a tracer instance."""
return trace.get_tracer(name)
def get_meter(self, name: str) -> metrics.Meter:
"""Get a meter instance."""
return metrics.get_meter(name)
# Global OpenTelemetry configuration instance (lazy-loaded)
_ot_config = None
_ot_initialized = False
def get_ot_config():
"""Get the global OpenTelemetry configuration instance."""
global _ot_config
if _ot_config is None:
_ot_config = OpenTelemetryConfig()
return _ot_config
def setup_opentelemetry() -> None:
"""
Set up OpenTelemetry for the ivatar application.
This function should be called during Django application startup.
"""
global _ot_initialized
if _ot_initialized:
logger.debug("OpenTelemetry already initialized, skipping setup")
return
logger.info("Setting up OpenTelemetry...")
ot_config = get_ot_config()
ot_config.setup_tracing()
ot_config.setup_metrics()
ot_config.setup_instrumentation()
if ot_config.enabled:
if ot_config.export_enabled:
logger.info("OpenTelemetry setup completed successfully (export enabled)")
else:
logger.info("OpenTelemetry setup completed successfully (export disabled)")
_ot_initialized = True
else:
logger.info("OpenTelemetry setup failed")
def get_tracer(name: str) -> trace.Tracer:
"""Get a tracer instance for the given name."""
return get_ot_config().get_tracer(name)
def get_meter(name: str) -> metrics.Meter:
"""Get a meter instance for the given name."""
return get_ot_config().get_meter(name)
def is_enabled() -> bool:
"""Check if OpenTelemetry is enabled (always True now)."""
return True
def is_export_enabled() -> bool:
"""Check if OpenTelemetry data export is enabled."""
return get_ot_config().export_enabled