From a58e812fb6dc4203ed0dbfec295143c83dc869e6 Mon Sep 17 00:00:00 2001
From: Oliver Falk <oliver@linux-kernel.at>
Date: Tue, 23 Sep 2025 15:52:43 +0200
Subject: [PATCH 1/9] Add comprehensive statistics to StatsView

- Implement top 10 viewed avatars with associated email/OpenID details
- Add top 10 queried email addresses and OpenIDs by access count
- Include photo format distribution statistics
- Add user activity metrics (multiple photos, email+OpenID users, avg photos per user)
- Implement Bluesky handles statistics with top handles by access count
- Add average photo size calculation using fast SQL queries
- Include potential duplicate photos detection by format and size
- Use raw SQL for performance optimization on large datasets
- Remove orphaned photos check as requested

All statistics now return consistent data structure with access_count and digest_sha256 fields.
---
 ivatar/views.py | 232 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 232 insertions(+)

diff --git a/ivatar/views.py b/ivatar/views.py
index d63d082..39f7478 100644
--- a/ivatar/views.py
+++ b/ivatar/views.py
@@ -560,4 +560,236 @@ class StatsView(TemplateView, JsonResponse):
             "avatars": Photo.objects.count(),  # pylint: disable=no-member
         }
 
+        # Top 10 viewed avatars
+        top_photos = Photo.objects.order_by("-access_count")[:10]
+        top_photos_data = []
+        for photo in top_photos:
+            # Find the associated email or openid with highest access count
+            associated_emails = photo.emails.all().order_by("-access_count")
+            associated_openids = photo.openids.all().order_by("-access_count")
+
+            # Get the one with highest access count
+            top_associated = None
+            if associated_emails and associated_openids:
+                if (
+                    associated_emails[0].access_count
+                    >= associated_openids[0].access_count
+                ):
+                    top_associated = associated_emails[0]
+                else:
+                    top_associated = associated_openids[0]
+            elif associated_emails:
+                top_associated = associated_emails[0]
+            elif associated_openids:
+                top_associated = associated_openids[0]
+
+            if top_associated:
+                if hasattr(top_associated, "email"):
+                    # It's a ConfirmedEmail
+                    top_photos_data.append(
+                        {
+                            "email": top_associated.email,
+                            "access_count": top_associated.access_count,
+                            "digest_sha256": top_associated.digest_sha256,
+                        }
+                    )
+                else:
+                    # It's a ConfirmedOpenId
+                    top_photos_data.append(
+                        {
+                            "openid": top_associated.openid,
+                            "access_count": top_associated.access_count,
+                            "digest_sha256": top_associated.digest,
+                        }
+                    )
+
+        retval["top_viewed_avatars"] = top_photos_data
+
+        # Top 10 queried email addresses
+        top_emails = ConfirmedEmail.objects.order_by("-access_count")[:10]
+        top_emails_data = []
+        for email in top_emails:
+            top_emails_data.append(
+                {
+                    "email": email.email,
+                    "access_count": email.access_count,
+                    "digest_sha256": email.digest_sha256,
+                }
+            )
+
+        retval["top_queried_emails"] = top_emails_data
+
+        # Top 10 queried OpenIDs
+        top_openids = ConfirmedOpenId.objects.order_by("-access_count")[:10]
+        top_openids_data = []
+        for openid in top_openids:
+            top_openids_data.append(
+                {
+                    "openid": openid.openid,
+                    "access_count": openid.access_count,
+                    "digest_sha256": openid.digest,
+                }
+            )
+
+        retval["top_queried_openids"] = top_openids_data
+
+        # Photo format distribution
+        from django.db.models import Count
+
+        format_distribution = (
+            Photo.objects.values("format")
+            .annotate(count=Count("format"))
+            .order_by("-count")
+        )
+        retval["photo_format_distribution"] = list(format_distribution)
+
+        # User activity statistics
+        users_with_multiple_photos = (
+            User.objects.annotate(photo_count=Count("photo"))
+            .filter(photo_count__gt=1)
+            .count()
+        )
+        users_with_both_email_and_openid = (
+            User.objects.filter(
+                confirmedemail__isnull=False, confirmedopenid__isnull=False
+            )
+            .distinct()
+            .count()
+        )
+
+        # Calculate average photos per user
+        total_photos = Photo.objects.count()
+        total_users = User.objects.count()
+        avg_photos_per_user = total_photos / total_users if total_users > 0 else 0
+
+        retval["user_activity"] = {
+            "users_with_multiple_photos": users_with_multiple_photos,
+            "users_with_both_email_and_openid": users_with_both_email_and_openid,
+            "average_photos_per_user": round(avg_photos_per_user, 2),
+        }
+
+        # Bluesky handles statistics
+        bluesky_emails = ConfirmedEmail.objects.filter(
+            bluesky_handle__isnull=False
+        ).count()
+        bluesky_openids = ConfirmedOpenId.objects.filter(
+            bluesky_handle__isnull=False
+        ).count()
+        total_bluesky_handles = bluesky_emails + bluesky_openids
+
+        # Top Bluesky handles by access count
+        top_bluesky_emails = ConfirmedEmail.objects.filter(
+            bluesky_handle__isnull=False
+        ).order_by("-access_count")[:5]
+
+        top_bluesky_openids = ConfirmedOpenId.objects.filter(
+            bluesky_handle__isnull=False
+        ).order_by("-access_count")[:5]
+
+        top_bluesky_handles = []
+        for email in top_bluesky_emails:
+            top_bluesky_handles.append(
+                {
+                    "handle": email.bluesky_handle,
+                    "access_count": email.access_count,
+                    "type": "email",
+                }
+            )
+
+        for openid in top_bluesky_openids:
+            top_bluesky_handles.append(
+                {
+                    "handle": openid.bluesky_handle,
+                    "access_count": openid.access_count,
+                    "type": "openid",
+                }
+            )
+
+        # Sort by access count and take top 5
+        top_bluesky_handles.sort(key=lambda x: x["access_count"], reverse=True)
+        top_bluesky_handles = top_bluesky_handles[:5]
+
+        retval["bluesky_handles"] = {
+            "total_bluesky_handles": total_bluesky_handles,
+            "bluesky_emails": bluesky_emails,
+            "bluesky_openids": bluesky_openids,
+            "top_bluesky_handles": top_bluesky_handles,
+        }
+
+        # Average photo size statistics using raw SQL
+        from django.db import connection
+
+        with connection.cursor() as cursor:
+            # SQL to calculate average photo size
+            cursor.execute(
+                """
+                SELECT
+                    COUNT(*) as photo_count,
+                    AVG(LENGTH(data)) as avg_size_bytes
+                FROM ivataraccount_photo
+                WHERE data IS NOT NULL
+            """
+            )
+            result = cursor.fetchone()
+
+            if result and result[0] > 0:
+                photo_count, avg_size_bytes = result
+                avg_size_kb = round(avg_size_bytes / 1024, 2) if avg_size_bytes else 0
+                avg_size_mb = (
+                    round(avg_size_bytes / (1024 * 1024), 2) if avg_size_bytes else 0
+                )
+
+                retval["photo_size_stats"] = {
+                    "average_size_bytes": round(avg_size_bytes, 2)
+                    if avg_size_bytes
+                    else 0,
+                    "average_size_kb": avg_size_kb,
+                    "average_size_mb": avg_size_mb,
+                    "total_photos_analyzed": photo_count,
+                }
+            else:
+                retval["photo_size_stats"] = {
+                    "average_size_bytes": 0,
+                    "average_size_kb": 0,
+                    "average_size_mb": 0,
+                    "total_photos_analyzed": 0,
+                }
+
+        # For potential duplicate photos, we'll check for photos with the same format and size
+        # Note: This is not definitive - different images can have the same format and size
+        # but it's a good indicator of potential duplicates that might warrant investigation
+        with connection.cursor() as cursor:
+            cursor.execute(
+                """
+                SELECT
+                    format,
+                    LENGTH(data) as file_size,
+                    COUNT(*) as count
+                FROM ivataraccount_photo
+                WHERE data IS NOT NULL
+                GROUP BY format, LENGTH(data)
+                HAVING COUNT(*) > 1
+                ORDER BY count DESC
+                LIMIT 10
+            """
+            )
+            duplicate_groups = cursor.fetchall()
+
+            total_potential_duplicate_photos = sum(
+                group[2] for group in duplicate_groups
+            )
+
+            # Convert to list of dictionaries for JSON serialization
+            duplicate_groups_detail = [
+                {"format": group[0], "file_size": group[1], "count": group[2]}
+                for group in duplicate_groups
+            ]
+
+        retval["potential_duplicate_photos"] = {
+            "potential_duplicate_groups": len(duplicate_groups),
+            "total_potential_duplicate_photos": total_potential_duplicate_photos,
+            "potential_duplicate_groups_detail": duplicate_groups_detail,
+            "note": "Potential duplicates are identified by matching file format and size - not definitive duplicates",
+        }
+
         return JsonResponse(retval)

From 9d647fe0752b61e8e9cfaa67addc30e3c1fe9795 Mon Sep 17 00:00:00 2001
From: Oliver Falk <oliver@linux-kernel.at>
Date: Tue, 23 Sep 2025 16:18:29 +0200
Subject: [PATCH 2/9] Add comprehensive tests for StatsView

- Add test_stats_basic: Test basic count statistics
- Add test_stats_comprehensive: Test all new statistics with real data
- Add test_stats_edge_cases: Test edge cases with empty data
- Add test_stats_with_bluesky_handles: Test Bluesky handles functionality
- Add test_stats_photo_duplicates: Test potential duplicate photos detection

All tests cover:
- Top 10 viewed avatars with associated email/OpenID details
- Top 10 queried email addresses and OpenIDs
- Photo format distribution statistics
- User activity metrics (multiple photos, email+OpenID users, avg photos per user)
- Bluesky handles statistics with top handles by access count
- Average photo size calculation using SQL queries
- Potential duplicate photos detection by format and size
- Edge cases and error handling

Tests use valid PNG image data and proper model relationships.
All 5 new test methods pass successfully.
---
 ivatar/test_views.py | 315 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 313 insertions(+), 2 deletions(-)

diff --git a/ivatar/test_views.py b/ivatar/test_views.py
index 582539b..77019fa 100644
--- a/ivatar/test_views.py
+++ b/ivatar/test_views.py
@@ -68,9 +68,9 @@ class Tester(TestCase):  # pylint: disable=too-many-public-methods
         #   msg_prefix="Why does an invalid hash not redirect to deadbeef?",
         # )
 
-    def test_stats(self):
+    def test_stats_basic(self):
         """
-        Test incorrect digest
+        Test basic stats functionality
         """
         response = self.client.get("/stats/", follow=True)
         self.assertEqual(response.status_code, 200, "unable to fetch stats!")
@@ -84,6 +84,317 @@ class Tester(TestCase):  # pylint: disable=too-many-public-methods
         )
         self.assertEqual(j["avatars"], 0, "avatars count incorrect")
 
+    def test_stats_comprehensive(self):
+        """
+        Test comprehensive stats with actual data
+        """
+        from ivatar.ivataraccount.models import (
+            ConfirmedEmail,
+            ConfirmedOpenId,
+            Photo,
+            UnconfirmedEmail,
+            UnconfirmedOpenId,
+        )
+
+        # Create test data
+        email1 = ConfirmedEmail.objects.create(
+            user=self.user, email="test1@example.com", ip_address="192.168.1.1"
+        )
+        email1.access_count = 100
+        email1.save()
+
+        email2 = ConfirmedEmail.objects.create(
+            user=self.user, email="test2@example.com", ip_address="192.168.1.2"
+        )
+        email2.access_count = 50
+        email2.save()
+
+        openid1 = ConfirmedOpenId.objects.create(
+            user=self.user, openid="http://test1.example.com/", ip_address="192.168.1.3"
+        )
+        openid1.access_count = 75
+        openid1.save()
+
+        # Create photos with valid image data (minimal PNG)
+        # PNG header + minimal data
+        png_data = b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01\x08\x02\x00\x00\x00\x90wS\xde\x00\x00\x00\tpHYs\x00\x00\x0b\x13\x00\x00\x0b\x13\x01\x00\x9a\x9c\x18\x00\x00\x00\nIDATx\x9cc```\x00\x00\x00\x04\x00\x01\xdd\x8d\xb4\x1c\x00\x00\x00\x00IEND\xaeB`\x82"
+
+        photo1 = Photo.objects.create(
+            user=self.user, data=png_data, format="png", ip_address="192.168.1.4"
+        )
+        photo1.access_count = 200
+        photo1.save()
+
+        photo2 = Photo.objects.create(
+            user=self.user,
+            data=png_data,  # Same data for testing
+            format="png",  # Same format for testing
+            ip_address="192.168.1.5",
+        )
+        photo2.access_count = 150
+        photo2.save()
+
+        # Associate photos with emails/openids
+        email1.photo = photo1
+        email1.save()
+        email2.photo = photo2
+        email2.save()
+        openid1.photo = photo1
+        openid1.save()
+
+        # Create unconfirmed entries
+        UnconfirmedEmail.objects.create(
+            user=self.user, email="unconfirmed@example.com", ip_address="192.168.1.6"
+        )
+
+        UnconfirmedOpenId.objects.create(
+            user=self.user,
+            openid="http://unconfirmed.example.com/",
+            ip_address="192.168.1.7",
+        )
+
+        # Test the stats endpoint
+        response = self.client.get("/stats/")
+        self.assertEqual(response.status_code, 200, "unable to fetch stats!")
+        j = json.loads(response.content)
+
+        # Test basic counts
+        self.assertEqual(j["users"], 1, "user count incorrect")
+        self.assertEqual(j["mails"], 2, "mails count incorrect")
+        self.assertEqual(j["openids"], 1, "openids count incorrect")
+        self.assertEqual(j["unconfirmed_mails"], 1, "unconfirmed mails count incorrect")
+        self.assertEqual(
+            j["unconfirmed_openids"], 1, "unconfirmed openids count incorrect"
+        )
+        self.assertEqual(j["avatars"], 2, "avatars count incorrect")
+
+        # Test top viewed avatars
+        self.assertIn("top_viewed_avatars", j, "top_viewed_avatars missing")
+        self.assertEqual(
+            len(j["top_viewed_avatars"]), 2, "should have 2 top viewed avatars"
+        )
+        # The top viewed avatar should be the one with highest associated email/openid access count
+        self.assertEqual(
+            j["top_viewed_avatars"][0]["access_count"],
+            100,
+            "top avatar access count incorrect",
+        )
+
+        # Test top queried emails
+        self.assertIn("top_queried_emails", j, "top_queried_emails missing")
+        self.assertEqual(
+            len(j["top_queried_emails"]), 2, "should have 2 top queried emails"
+        )
+        self.assertEqual(
+            j["top_queried_emails"][0]["access_count"],
+            100,
+            "top email access count incorrect",
+        )
+
+        # Test top queried openids
+        self.assertIn("top_queried_openids", j, "top_queried_openids missing")
+        self.assertEqual(
+            len(j["top_queried_openids"]), 1, "should have 1 top queried openid"
+        )
+        self.assertEqual(
+            j["top_queried_openids"][0]["access_count"],
+            75,
+            "top openid access count incorrect",
+        )
+
+        # Test photo format distribution
+        self.assertIn(
+            "photo_format_distribution", j, "photo_format_distribution missing"
+        )
+        formats = {
+            item["format"]: item["count"] for item in j["photo_format_distribution"]
+        }
+        self.assertEqual(formats["png"], 2, "png format count incorrect")
+
+        # Test user activity stats
+        self.assertIn("user_activity", j, "user_activity missing")
+        self.assertEqual(
+            j["user_activity"]["users_with_multiple_photos"],
+            1,
+            "users with multiple photos incorrect",
+        )
+        self.assertEqual(
+            j["user_activity"]["users_with_both_email_and_openid"],
+            1,
+            "users with both email and openid incorrect",
+        )
+        self.assertEqual(
+            j["user_activity"]["average_photos_per_user"],
+            2.0,
+            "average photos per user incorrect",
+        )
+
+        # Test Bluesky handles (should be empty)
+        self.assertIn("bluesky_handles", j, "bluesky_handles missing")
+        self.assertEqual(
+            j["bluesky_handles"]["total_bluesky_handles"],
+            0,
+            "total bluesky handles should be 0",
+        )
+
+        # Test photo size stats
+        self.assertIn("photo_size_stats", j, "photo_size_stats missing")
+        self.assertGreater(
+            j["photo_size_stats"]["average_size_bytes"],
+            0,
+            "average photo size should be > 0",
+        )
+        self.assertEqual(
+            j["photo_size_stats"]["total_photos_analyzed"],
+            2,
+            "total photos analyzed incorrect",
+        )
+
+        # Test potential duplicate photos
+        self.assertIn(
+            "potential_duplicate_photos", j, "potential_duplicate_photos missing"
+        )
+        self.assertEqual(
+            j["potential_duplicate_photos"]["potential_duplicate_groups"],
+            1,
+            "should have 1 duplicate group (same PNG data)",
+        )
+
+    def test_stats_edge_cases(self):
+        """
+        Test edge cases for stats
+        """
+        # Test with no data
+        response = self.client.get("/stats/")
+        self.assertEqual(response.status_code, 200, "unable to fetch stats!")
+        j = json.loads(response.content)
+
+        # All lists should be empty
+        self.assertEqual(
+            len(j["top_viewed_avatars"]), 0, "top_viewed_avatars should be empty"
+        )
+        self.assertEqual(
+            len(j["top_queried_emails"]), 0, "top_queried_emails should be empty"
+        )
+        self.assertEqual(
+            len(j["top_queried_openids"]), 0, "top_queried_openids should be empty"
+        )
+        self.assertEqual(
+            len(j["photo_format_distribution"]),
+            0,
+            "photo_format_distribution should be empty",
+        )
+        self.assertEqual(
+            j["bluesky_handles"]["total_bluesky_handles"],
+            0,
+            "bluesky_handles should be 0",
+        )
+        self.assertEqual(
+            j["photo_size_stats"]["total_photos_analyzed"],
+            0,
+            "photo_size_stats should be 0",
+        )
+        self.assertEqual(
+            j["potential_duplicate_photos"]["potential_duplicate_groups"],
+            0,
+            "potential_duplicate_photos should be 0",
+        )
+
+    def test_stats_with_bluesky_handles(self):
+        """
+        Test stats with Bluesky handles
+        """
+        from ivatar.ivataraccount.models import ConfirmedEmail, ConfirmedOpenId
+
+        # Create email with Bluesky handle
+        email = ConfirmedEmail.objects.create(
+            user=self.user, email="bluesky@example.com", ip_address="192.168.1.1"
+        )
+        email.bluesky_handle = "test.bsky.social"
+        email.access_count = 100
+        email.save()
+
+        # Create OpenID with Bluesky handle
+        openid = ConfirmedOpenId.objects.create(
+            user=self.user,
+            openid="http://bluesky.example.com/",
+            ip_address="192.168.1.2",
+        )
+        openid.bluesky_handle = "another.bsky.social"
+        openid.access_count = 50
+        openid.save()
+
+        response = self.client.get("/stats/")
+        self.assertEqual(response.status_code, 200, "unable to fetch stats!")
+        j = json.loads(response.content)
+
+        # Test Bluesky handles stats
+        self.assertEqual(
+            j["bluesky_handles"]["total_bluesky_handles"],
+            2,
+            "total bluesky handles incorrect",
+        )
+        self.assertEqual(
+            j["bluesky_handles"]["bluesky_emails"], 1, "bluesky emails count incorrect"
+        )
+        self.assertEqual(
+            j["bluesky_handles"]["bluesky_openids"],
+            1,
+            "bluesky openids count incorrect",
+        )
+        self.assertEqual(
+            len(j["bluesky_handles"]["top_bluesky_handles"]),
+            2,
+            "top bluesky handles count incorrect",
+        )
+
+    def test_stats_photo_duplicates(self):
+        """
+        Test potential duplicate photos detection
+        """
+        from ivatar.ivataraccount.models import Photo
+
+        # Create photos with same format and size (potential duplicates)
+        # PNG header + minimal data
+        png_data = b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01\x08\x02\x00\x00\x00\x90wS\xde\x00\x00\x00\tpHYs\x00\x00\x0b\x13\x00\x00\x0b\x13\x01\x00\x9a\x9c\x18\x00\x00\x00\nIDATx\x9cc```\x00\x00\x00\x04\x00\x01\xdd\x8d\xb4\x1c\x00\x00\x00\x00IEND\xaeB`\x82"
+
+        Photo.objects.create(
+            user=self.user, data=png_data, format="png", ip_address="192.168.1.1"
+        )
+        Photo.objects.create(
+            user=self.user,
+            data=png_data,  # Same size
+            format="png",  # Same format
+            ip_address="192.168.1.2",
+        )
+        Photo.objects.create(
+            user=self.user,
+            data=png_data,  # Same size but different format
+            format="png",  # Same format for testing
+            ip_address="192.168.1.3",
+        )
+
+        response = self.client.get("/stats/")
+        self.assertEqual(response.status_code, 200, "unable to fetch stats!")
+        j = json.loads(response.content)
+
+        # Should detect potential duplicates
+        self.assertEqual(
+            j["potential_duplicate_photos"]["potential_duplicate_groups"],
+            1,
+            "should have 1 duplicate group",
+        )
+        self.assertEqual(
+            j["potential_duplicate_photos"]["total_potential_duplicate_photos"],
+            3,
+            "should have 3 potential duplicate photos",
+        )
+        self.assertEqual(
+            len(j["potential_duplicate_photos"]["potential_duplicate_groups_detail"]),
+            1,
+            "should have 1 duplicate group detail",
+        )
+
     def test_logout(self):
         """
         Test if logout works correctly

From 4a684f99478dc5c35d659262efc81ff115b288d6 Mon Sep 17 00:00:00 2001
From: Oliver Falk <oliver@linux-kernel.at>
Date: Tue, 23 Sep 2025 16:47:56 +0200
Subject: [PATCH 3/9] Refactor stats tests into separate file with random data

- Add random_ip_address() function to ivatar.utils for generating random IP addresses
- Create separate test_views_stats.py file with StatsTester class
- Move all stats tests from test_views.py to test_views_stats.py
- Update tests to use random_string() for emails and OpenIDs instead of static @example.com
- Update tests to use random_ip_address() for IP addresses instead of static 192.168.1.x
- Remove stats tests from original test_views.py file
---
 ivatar/test_views.py       | 328 --------------------------------
 ivatar/test_views_stats.py | 379 +++++++++++++++++++++++++++++++++++++
 ivatar/utils.py            |   7 +
 3 files changed, 386 insertions(+), 328 deletions(-)
 create mode 100644 ivatar/test_views_stats.py

diff --git a/ivatar/test_views.py b/ivatar/test_views.py
index 77019fa..2049858 100644
--- a/ivatar/test_views.py
+++ b/ivatar/test_views.py
@@ -7,7 +7,6 @@ import contextlib
 
 # pylint: disable=too-many-lines
 import os
-import json
 import django
 from django.urls import reverse
 from django.test import TestCase
@@ -68,333 +67,6 @@ class Tester(TestCase):  # pylint: disable=too-many-public-methods
         #   msg_prefix="Why does an invalid hash not redirect to deadbeef?",
         # )
 
-    def test_stats_basic(self):
-        """
-        Test basic stats functionality
-        """
-        response = self.client.get("/stats/", follow=True)
-        self.assertEqual(response.status_code, 200, "unable to fetch stats!")
-        j = json.loads(response.content)
-        self.assertEqual(j["users"], 1, "user count incorrect")
-        self.assertEqual(j["mails"], 0, "mails count incorrect")
-        self.assertEqual(j["openids"], 0, "openids count incorrect")
-        self.assertEqual(j["unconfirmed_mails"], 0, "unconfirmed mails count incorrect")
-        self.assertEqual(
-            j["unconfirmed_openids"], 0, "unconfirmed openids count incorrect"
-        )
-        self.assertEqual(j["avatars"], 0, "avatars count incorrect")
-
-    def test_stats_comprehensive(self):
-        """
-        Test comprehensive stats with actual data
-        """
-        from ivatar.ivataraccount.models import (
-            ConfirmedEmail,
-            ConfirmedOpenId,
-            Photo,
-            UnconfirmedEmail,
-            UnconfirmedOpenId,
-        )
-
-        # Create test data
-        email1 = ConfirmedEmail.objects.create(
-            user=self.user, email="test1@example.com", ip_address="192.168.1.1"
-        )
-        email1.access_count = 100
-        email1.save()
-
-        email2 = ConfirmedEmail.objects.create(
-            user=self.user, email="test2@example.com", ip_address="192.168.1.2"
-        )
-        email2.access_count = 50
-        email2.save()
-
-        openid1 = ConfirmedOpenId.objects.create(
-            user=self.user, openid="http://test1.example.com/", ip_address="192.168.1.3"
-        )
-        openid1.access_count = 75
-        openid1.save()
-
-        # Create photos with valid image data (minimal PNG)
-        # PNG header + minimal data
-        png_data = b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01\x08\x02\x00\x00\x00\x90wS\xde\x00\x00\x00\tpHYs\x00\x00\x0b\x13\x00\x00\x0b\x13\x01\x00\x9a\x9c\x18\x00\x00\x00\nIDATx\x9cc```\x00\x00\x00\x04\x00\x01\xdd\x8d\xb4\x1c\x00\x00\x00\x00IEND\xaeB`\x82"
-
-        photo1 = Photo.objects.create(
-            user=self.user, data=png_data, format="png", ip_address="192.168.1.4"
-        )
-        photo1.access_count = 200
-        photo1.save()
-
-        photo2 = Photo.objects.create(
-            user=self.user,
-            data=png_data,  # Same data for testing
-            format="png",  # Same format for testing
-            ip_address="192.168.1.5",
-        )
-        photo2.access_count = 150
-        photo2.save()
-
-        # Associate photos with emails/openids
-        email1.photo = photo1
-        email1.save()
-        email2.photo = photo2
-        email2.save()
-        openid1.photo = photo1
-        openid1.save()
-
-        # Create unconfirmed entries
-        UnconfirmedEmail.objects.create(
-            user=self.user, email="unconfirmed@example.com", ip_address="192.168.1.6"
-        )
-
-        UnconfirmedOpenId.objects.create(
-            user=self.user,
-            openid="http://unconfirmed.example.com/",
-            ip_address="192.168.1.7",
-        )
-
-        # Test the stats endpoint
-        response = self.client.get("/stats/")
-        self.assertEqual(response.status_code, 200, "unable to fetch stats!")
-        j = json.loads(response.content)
-
-        # Test basic counts
-        self.assertEqual(j["users"], 1, "user count incorrect")
-        self.assertEqual(j["mails"], 2, "mails count incorrect")
-        self.assertEqual(j["openids"], 1, "openids count incorrect")
-        self.assertEqual(j["unconfirmed_mails"], 1, "unconfirmed mails count incorrect")
-        self.assertEqual(
-            j["unconfirmed_openids"], 1, "unconfirmed openids count incorrect"
-        )
-        self.assertEqual(j["avatars"], 2, "avatars count incorrect")
-
-        # Test top viewed avatars
-        self.assertIn("top_viewed_avatars", j, "top_viewed_avatars missing")
-        self.assertEqual(
-            len(j["top_viewed_avatars"]), 2, "should have 2 top viewed avatars"
-        )
-        # The top viewed avatar should be the one with highest associated email/openid access count
-        self.assertEqual(
-            j["top_viewed_avatars"][0]["access_count"],
-            100,
-            "top avatar access count incorrect",
-        )
-
-        # Test top queried emails
-        self.assertIn("top_queried_emails", j, "top_queried_emails missing")
-        self.assertEqual(
-            len(j["top_queried_emails"]), 2, "should have 2 top queried emails"
-        )
-        self.assertEqual(
-            j["top_queried_emails"][0]["access_count"],
-            100,
-            "top email access count incorrect",
-        )
-
-        # Test top queried openids
-        self.assertIn("top_queried_openids", j, "top_queried_openids missing")
-        self.assertEqual(
-            len(j["top_queried_openids"]), 1, "should have 1 top queried openid"
-        )
-        self.assertEqual(
-            j["top_queried_openids"][0]["access_count"],
-            75,
-            "top openid access count incorrect",
-        )
-
-        # Test photo format distribution
-        self.assertIn(
-            "photo_format_distribution", j, "photo_format_distribution missing"
-        )
-        formats = {
-            item["format"]: item["count"] for item in j["photo_format_distribution"]
-        }
-        self.assertEqual(formats["png"], 2, "png format count incorrect")
-
-        # Test user activity stats
-        self.assertIn("user_activity", j, "user_activity missing")
-        self.assertEqual(
-            j["user_activity"]["users_with_multiple_photos"],
-            1,
-            "users with multiple photos incorrect",
-        )
-        self.assertEqual(
-            j["user_activity"]["users_with_both_email_and_openid"],
-            1,
-            "users with both email and openid incorrect",
-        )
-        self.assertEqual(
-            j["user_activity"]["average_photos_per_user"],
-            2.0,
-            "average photos per user incorrect",
-        )
-
-        # Test Bluesky handles (should be empty)
-        self.assertIn("bluesky_handles", j, "bluesky_handles missing")
-        self.assertEqual(
-            j["bluesky_handles"]["total_bluesky_handles"],
-            0,
-            "total bluesky handles should be 0",
-        )
-
-        # Test photo size stats
-        self.assertIn("photo_size_stats", j, "photo_size_stats missing")
-        self.assertGreater(
-            j["photo_size_stats"]["average_size_bytes"],
-            0,
-            "average photo size should be > 0",
-        )
-        self.assertEqual(
-            j["photo_size_stats"]["total_photos_analyzed"],
-            2,
-            "total photos analyzed incorrect",
-        )
-
-        # Test potential duplicate photos
-        self.assertIn(
-            "potential_duplicate_photos", j, "potential_duplicate_photos missing"
-        )
-        self.assertEqual(
-            j["potential_duplicate_photos"]["potential_duplicate_groups"],
-            1,
-            "should have 1 duplicate group (same PNG data)",
-        )
-
-    def test_stats_edge_cases(self):
-        """
-        Test edge cases for stats
-        """
-        # Test with no data
-        response = self.client.get("/stats/")
-        self.assertEqual(response.status_code, 200, "unable to fetch stats!")
-        j = json.loads(response.content)
-
-        # All lists should be empty
-        self.assertEqual(
-            len(j["top_viewed_avatars"]), 0, "top_viewed_avatars should be empty"
-        )
-        self.assertEqual(
-            len(j["top_queried_emails"]), 0, "top_queried_emails should be empty"
-        )
-        self.assertEqual(
-            len(j["top_queried_openids"]), 0, "top_queried_openids should be empty"
-        )
-        self.assertEqual(
-            len(j["photo_format_distribution"]),
-            0,
-            "photo_format_distribution should be empty",
-        )
-        self.assertEqual(
-            j["bluesky_handles"]["total_bluesky_handles"],
-            0,
-            "bluesky_handles should be 0",
-        )
-        self.assertEqual(
-            j["photo_size_stats"]["total_photos_analyzed"],
-            0,
-            "photo_size_stats should be 0",
-        )
-        self.assertEqual(
-            j["potential_duplicate_photos"]["potential_duplicate_groups"],
-            0,
-            "potential_duplicate_photos should be 0",
-        )
-
-    def test_stats_with_bluesky_handles(self):
-        """
-        Test stats with Bluesky handles
-        """
-        from ivatar.ivataraccount.models import ConfirmedEmail, ConfirmedOpenId
-
-        # Create email with Bluesky handle
-        email = ConfirmedEmail.objects.create(
-            user=self.user, email="bluesky@example.com", ip_address="192.168.1.1"
-        )
-        email.bluesky_handle = "test.bsky.social"
-        email.access_count = 100
-        email.save()
-
-        # Create OpenID with Bluesky handle
-        openid = ConfirmedOpenId.objects.create(
-            user=self.user,
-            openid="http://bluesky.example.com/",
-            ip_address="192.168.1.2",
-        )
-        openid.bluesky_handle = "another.bsky.social"
-        openid.access_count = 50
-        openid.save()
-
-        response = self.client.get("/stats/")
-        self.assertEqual(response.status_code, 200, "unable to fetch stats!")
-        j = json.loads(response.content)
-
-        # Test Bluesky handles stats
-        self.assertEqual(
-            j["bluesky_handles"]["total_bluesky_handles"],
-            2,
-            "total bluesky handles incorrect",
-        )
-        self.assertEqual(
-            j["bluesky_handles"]["bluesky_emails"], 1, "bluesky emails count incorrect"
-        )
-        self.assertEqual(
-            j["bluesky_handles"]["bluesky_openids"],
-            1,
-            "bluesky openids count incorrect",
-        )
-        self.assertEqual(
-            len(j["bluesky_handles"]["top_bluesky_handles"]),
-            2,
-            "top bluesky handles count incorrect",
-        )
-
-    def test_stats_photo_duplicates(self):
-        """
-        Test potential duplicate photos detection
-        """
-        from ivatar.ivataraccount.models import Photo
-
-        # Create photos with same format and size (potential duplicates)
-        # PNG header + minimal data
-        png_data = b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01\x08\x02\x00\x00\x00\x90wS\xde\x00\x00\x00\tpHYs\x00\x00\x0b\x13\x00\x00\x0b\x13\x01\x00\x9a\x9c\x18\x00\x00\x00\nIDATx\x9cc```\x00\x00\x00\x04\x00\x01\xdd\x8d\xb4\x1c\x00\x00\x00\x00IEND\xaeB`\x82"
-
-        Photo.objects.create(
-            user=self.user, data=png_data, format="png", ip_address="192.168.1.1"
-        )
-        Photo.objects.create(
-            user=self.user,
-            data=png_data,  # Same size
-            format="png",  # Same format
-            ip_address="192.168.1.2",
-        )
-        Photo.objects.create(
-            user=self.user,
-            data=png_data,  # Same size but different format
-            format="png",  # Same format for testing
-            ip_address="192.168.1.3",
-        )
-
-        response = self.client.get("/stats/")
-        self.assertEqual(response.status_code, 200, "unable to fetch stats!")
-        j = json.loads(response.content)
-
-        # Should detect potential duplicates
-        self.assertEqual(
-            j["potential_duplicate_photos"]["potential_duplicate_groups"],
-            1,
-            "should have 1 duplicate group",
-        )
-        self.assertEqual(
-            j["potential_duplicate_photos"]["total_potential_duplicate_photos"],
-            3,
-            "should have 3 potential duplicate photos",
-        )
-        self.assertEqual(
-            len(j["potential_duplicate_photos"]["potential_duplicate_groups_detail"]),
-            1,
-            "should have 1 duplicate group detail",
-        )
-
     def test_logout(self):
         """
         Test if logout works correctly
diff --git a/ivatar/test_views_stats.py b/ivatar/test_views_stats.py
new file mode 100644
index 0000000..b4237df
--- /dev/null
+++ b/ivatar/test_views_stats.py
@@ -0,0 +1,379 @@
+# -*- coding: utf-8 -*-
+"""
+Test our StatsView in ivatar.views
+"""
+
+import json
+import os
+import django
+from django.test import TestCase
+from django.test import Client
+from django.contrib.auth.models import User
+from ivatar.utils import random_string, random_ip_address
+
+os.environ["DJANGO_SETTINGS_MODULE"] = "ivatar.settings"
+django.setup()
+
+
+class StatsTester(TestCase):
+    """
+    Test class for StatsView
+    """
+
+    client = Client()
+    user = None
+    username = random_string()
+    password = random_string()
+
+    def login(self):
+        """
+        Login as user
+        """
+        self.client.login(username=self.username, password=self.password)
+
+    def setUp(self):
+        """
+        Prepare for tests.
+        - Create user
+        """
+        self.user = User.objects.create_user(
+            username=self.username,
+            password=self.password,
+        )
+
+    def test_stats_basic(self):
+        """
+        Test basic stats functionality
+        """
+        response = self.client.get("/stats/", follow=True)
+        self.assertEqual(response.status_code, 200, "unable to fetch stats!")
+        j = json.loads(response.content)
+        self.assertEqual(j["users"], 1, "user count incorrect")
+        self.assertEqual(j["mails"], 0, "mails count incorrect")
+        self.assertEqual(j["openids"], 0, "openids count incorrect")
+        self.assertEqual(j["unconfirmed_mails"], 0, "unconfirmed mails count incorrect")
+        self.assertEqual(
+            j["unconfirmed_openids"], 0, "unconfirmed openids count incorrect"
+        )
+        self.assertEqual(j["avatars"], 0, "avatars count incorrect")
+
+    def test_stats_comprehensive(self):
+        """
+        Test comprehensive stats with actual data
+        """
+        from ivatar.ivataraccount.models import (
+            ConfirmedEmail,
+            ConfirmedOpenId,
+            Photo,
+            UnconfirmedEmail,
+            UnconfirmedOpenId,
+        )
+
+        # Create test data with random values
+        email1 = ConfirmedEmail.objects.create(
+            user=self.user,
+            email=f"{random_string()}@{random_string()}.{random_string(2)}",
+            ip_address=random_ip_address(),
+        )
+        email1.access_count = 100
+        email1.save()
+
+        email2 = ConfirmedEmail.objects.create(
+            user=self.user,
+            email=f"{random_string()}@{random_string()}.{random_string(2)}",
+            ip_address=random_ip_address(),
+        )
+        email2.access_count = 50
+        email2.save()
+
+        openid1 = ConfirmedOpenId.objects.create(
+            user=self.user,
+            openid=f"http://{random_string()}.{random_string()}.org/",
+            ip_address=random_ip_address(),
+        )
+        openid1.access_count = 75
+        openid1.save()
+
+        # Create photos with valid image data (minimal PNG)
+        # PNG header + minimal data
+        png_data = b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01\x08\x02\x00\x00\x00\x90wS\xde\x00\x00\x00\tpHYs\x00\x00\x0b\x13\x00\x00\x0b\x13\x01\x00\x9a\x9c\x18\x00\x00\x00\nIDATx\x9cc```\x00\x00\x00\x04\x00\x01\xdd\x8d\xb4\x1c\x00\x00\x00\x00IEND\xaeB`\x82"
+
+        photo1 = Photo.objects.create(
+            user=self.user, data=png_data, format="png", ip_address=random_ip_address()
+        )
+        photo1.access_count = 200
+        photo1.save()
+
+        photo2 = Photo.objects.create(
+            user=self.user,
+            data=png_data,  # Same data for testing
+            format="png",  # Same format for testing
+            ip_address=random_ip_address(),
+        )
+        photo2.access_count = 150
+        photo2.save()
+
+        # Associate photos with emails/openids
+        email1.photo = photo1
+        email1.save()
+        email2.photo = photo2
+        email2.save()
+        openid1.photo = photo1
+        openid1.save()
+
+        # Create unconfirmed entries
+        UnconfirmedEmail.objects.create(
+            user=self.user,
+            email=f"{random_string()}@{random_string()}.{random_string(2)}",
+            ip_address=random_ip_address(),
+        )
+
+        UnconfirmedOpenId.objects.create(
+            user=self.user,
+            openid=f"http://{random_string()}.{random_string()}.org/",
+            ip_address=random_ip_address(),
+        )
+
+        # Test the stats endpoint
+        response = self.client.get("/stats/")
+        self.assertEqual(response.status_code, 200, "unable to fetch stats!")
+        j = json.loads(response.content)
+
+        # Test basic counts
+        self.assertEqual(j["users"], 1, "user count incorrect")
+        self.assertEqual(j["mails"], 2, "mails count incorrect")
+        self.assertEqual(j["openids"], 1, "openids count incorrect")
+        self.assertEqual(j["unconfirmed_mails"], 1, "unconfirmed mails count incorrect")
+        self.assertEqual(
+            j["unconfirmed_openids"], 1, "unconfirmed openids count incorrect"
+        )
+        self.assertEqual(j["avatars"], 2, "avatars count incorrect")
+
+        # Test top viewed avatars
+        self.assertIn("top_viewed_avatars", j, "top_viewed_avatars missing")
+        self.assertEqual(
+            len(j["top_viewed_avatars"]), 2, "should have 2 top viewed avatars"
+        )
+        # The top viewed avatar should be the one with highest associated email/openid access count
+        self.assertEqual(
+            j["top_viewed_avatars"][0]["access_count"],
+            100,
+            "top avatar access count incorrect",
+        )
+
+        # Test top queried emails
+        self.assertIn("top_queried_emails", j, "top_queried_emails missing")
+        self.assertEqual(
+            len(j["top_queried_emails"]), 2, "should have 2 top queried emails"
+        )
+        self.assertEqual(
+            j["top_queried_emails"][0]["access_count"],
+            100,
+            "top email access count incorrect",
+        )
+
+        # Test top queried openids
+        self.assertIn("top_queried_openids", j, "top_queried_openids missing")
+        self.assertEqual(
+            len(j["top_queried_openids"]), 1, "should have 1 top queried openid"
+        )
+        self.assertEqual(
+            j["top_queried_openids"][0]["access_count"],
+            75,
+            "top openid access count incorrect",
+        )
+
+        # Test photo format distribution
+        self.assertIn(
+            "photo_format_distribution", j, "photo_format_distribution missing"
+        )
+        formats = {
+            item["format"]: item["count"] for item in j["photo_format_distribution"]
+        }
+        self.assertEqual(formats["png"], 2, "png format count incorrect")
+
+        # Test user activity stats
+        self.assertIn("user_activity", j, "user_activity missing")
+        self.assertEqual(
+            j["user_activity"]["users_with_multiple_photos"],
+            1,
+            "users with multiple photos incorrect",
+        )
+        self.assertEqual(
+            j["user_activity"]["users_with_both_email_and_openid"],
+            1,
+            "users with both email and openid incorrect",
+        )
+        self.assertEqual(
+            j["user_activity"]["average_photos_per_user"],
+            2.0,
+            "average photos per user incorrect",
+        )
+
+        # Test Bluesky handles (should be empty)
+        self.assertIn("bluesky_handles", j, "bluesky_handles missing")
+        self.assertEqual(
+            j["bluesky_handles"]["total_bluesky_handles"],
+            0,
+            "total bluesky handles should be 0",
+        )
+
+        # Test photo size stats
+        self.assertIn("photo_size_stats", j, "photo_size_stats missing")
+        self.assertGreater(
+            j["photo_size_stats"]["average_size_bytes"],
+            0,
+            "average photo size should be > 0",
+        )
+        self.assertEqual(
+            j["photo_size_stats"]["total_photos_analyzed"],
+            2,
+            "total photos analyzed incorrect",
+        )
+
+        # Test potential duplicate photos
+        self.assertIn(
+            "potential_duplicate_photos", j, "potential_duplicate_photos missing"
+        )
+        self.assertEqual(
+            j["potential_duplicate_photos"]["potential_duplicate_groups"],
+            1,
+            "should have 1 duplicate group (same PNG data)",
+        )
+
+    def test_stats_edge_cases(self):
+        """
+        Test edge cases for stats
+        """
+        # Test with no data
+        response = self.client.get("/stats/")
+        self.assertEqual(response.status_code, 200, "unable to fetch stats!")
+        j = json.loads(response.content)
+
+        # All lists should be empty
+        self.assertEqual(
+            len(j["top_viewed_avatars"]), 0, "top_viewed_avatars should be empty"
+        )
+        self.assertEqual(
+            len(j["top_queried_emails"]), 0, "top_queried_emails should be empty"
+        )
+        self.assertEqual(
+            len(j["top_queried_openids"]), 0, "top_queried_openids should be empty"
+        )
+        self.assertEqual(
+            len(j["photo_format_distribution"]),
+            0,
+            "photo_format_distribution should be empty",
+        )
+        self.assertEqual(
+            j["bluesky_handles"]["total_bluesky_handles"],
+            0,
+            "bluesky_handles should be 0",
+        )
+        self.assertEqual(
+            j["photo_size_stats"]["total_photos_analyzed"],
+            0,
+            "photo_size_stats should be 0",
+        )
+        self.assertEqual(
+            j["potential_duplicate_photos"]["potential_duplicate_groups"],
+            0,
+            "potential_duplicate_photos should be 0",
+        )
+
+    def test_stats_with_bluesky_handles(self):
+        """
+        Test stats with Bluesky handles
+        """
+        from ivatar.ivataraccount.models import ConfirmedEmail, ConfirmedOpenId
+
+        # Create email with Bluesky handle
+        email = ConfirmedEmail.objects.create(
+            user=self.user,
+            email=f"{random_string()}@{random_string()}.{random_string(2)}",
+            ip_address=random_ip_address(),
+        )
+        email.bluesky_handle = f"{random_string()}.bsky.social"
+        email.access_count = 100
+        email.save()
+
+        # Create OpenID with Bluesky handle
+        openid = ConfirmedOpenId.objects.create(
+            user=self.user,
+            openid=f"http://{random_string()}.{random_string()}.org/",
+            ip_address=random_ip_address(),
+        )
+        openid.bluesky_handle = f"{random_string()}.bsky.social"
+        openid.access_count = 50
+        openid.save()
+
+        response = self.client.get("/stats/")
+        self.assertEqual(response.status_code, 200, "unable to fetch stats!")
+        j = json.loads(response.content)
+
+        # Test Bluesky handles stats
+        self.assertEqual(
+            j["bluesky_handles"]["total_bluesky_handles"],
+            2,
+            "total bluesky handles incorrect",
+        )
+        self.assertEqual(
+            j["bluesky_handles"]["bluesky_emails"], 1, "bluesky emails count incorrect"
+        )
+        self.assertEqual(
+            j["bluesky_handles"]["bluesky_openids"],
+            1,
+            "bluesky openids count incorrect",
+        )
+        self.assertEqual(
+            len(j["bluesky_handles"]["top_bluesky_handles"]),
+            2,
+            "top bluesky handles count incorrect",
+        )
+
+    def test_stats_photo_duplicates(self):
+        """
+        Test potential duplicate photos detection
+        """
+        from ivatar.ivataraccount.models import Photo
+
+        # Create photos with same format and size (potential duplicates)
+        # PNG header + minimal data
+        png_data = b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01\x08\x02\x00\x00\x00\x90wS\xde\x00\x00\x00\tpHYs\x00\x00\x0b\x13\x00\x00\x0b\x13\x01\x00\x9a\x9c\x18\x00\x00\x00\nIDATx\x9cc```\x00\x00\x00\x04\x00\x01\xdd\x8d\xb4\x1c\x00\x00\x00\x00IEND\xaeB`\x82"
+
+        Photo.objects.create(
+            user=self.user, data=png_data, format="png", ip_address=random_ip_address()
+        )
+        Photo.objects.create(
+            user=self.user,
+            data=png_data,  # Same size
+            format="png",  # Same format
+            ip_address=random_ip_address(),
+        )
+        Photo.objects.create(
+            user=self.user,
+            data=png_data,  # Same size but different format
+            format="png",  # Same format for testing
+            ip_address=random_ip_address(),
+        )
+
+        response = self.client.get("/stats/")
+        self.assertEqual(response.status_code, 200, "unable to fetch stats!")
+        j = json.loads(response.content)
+
+        # Should detect potential duplicates
+        self.assertEqual(
+            j["potential_duplicate_photos"]["potential_duplicate_groups"],
+            1,
+            "should have 1 duplicate group",
+        )
+        self.assertEqual(
+            j["potential_duplicate_photos"]["total_potential_duplicate_photos"],
+            3,
+            "should have 3 potential duplicate photos",
+        )
+        self.assertEqual(
+            len(j["potential_duplicate_photos"]["potential_duplicate_groups_detail"]),
+            1,
+            "should have 1 duplicate group detail",
+        )
diff --git a/ivatar/utils.py b/ivatar/utils.py
index 3e50824..3df96bf 100644
--- a/ivatar/utils.py
+++ b/ivatar/utils.py
@@ -111,6 +111,13 @@ def random_string(length=10):
     )
 
 
+def random_ip_address():
+    """
+    Return a random IP address (IPv4)
+    """
+    return f"{random.randint(1, 254)}.{random.randint(1, 254)}.{random.randint(1, 254)}.{random.randint(1, 254)}"
+
+
 def openid_variations(openid):
     """
     Return the various OpenID variations, ALWAYS in the same order:

From 213e0cb2138c2feffc23af835ea05ff13809b20e Mon Sep 17 00:00:00 2001
From: Oliver Falk <oliver@linux-kernel.at>
Date: Tue, 23 Sep 2025 16:57:20 +0200
Subject: [PATCH 4/9] Remove privacy-sensitive data from stats JSON response

- Remove email addresses from top_viewed_avatars and top_queried_emails
- Remove OpenID URLs from top_viewed_avatars and top_queried_openids
- Remove Bluesky handles from bluesky_handles section
- Keep only access_count and digest_sha256 for privacy protection
- Update tests to reflect privacy changes
- All 5 stats tests pass successfully

This ensures that the stats endpoint no longer exposes:
- User email addresses
- OpenID URLs
- Bluesky handles
- Any other personally identifiable information

The stats now only show aggregated counts and hashed identifiers.
---
 ivatar/test_views_stats.py |  5 -----
 ivatar/views.py            | 36 ------------------------------------
 2 files changed, 41 deletions(-)

diff --git a/ivatar/test_views_stats.py b/ivatar/test_views_stats.py
index b4237df..309db08 100644
--- a/ivatar/test_views_stats.py
+++ b/ivatar/test_views_stats.py
@@ -325,11 +325,6 @@ class StatsTester(TestCase):
             1,
             "bluesky openids count incorrect",
         )
-        self.assertEqual(
-            len(j["bluesky_handles"]["top_bluesky_handles"]),
-            2,
-            "top bluesky handles count incorrect",
-        )
 
     def test_stats_photo_duplicates(self):
         """
diff --git a/ivatar/views.py b/ivatar/views.py
index 39f7478..997a824 100644
--- a/ivatar/views.py
+++ b/ivatar/views.py
@@ -588,7 +588,6 @@ class StatsView(TemplateView, JsonResponse):
                     # It's a ConfirmedEmail
                     top_photos_data.append(
                         {
-                            "email": top_associated.email,
                             "access_count": top_associated.access_count,
                             "digest_sha256": top_associated.digest_sha256,
                         }
@@ -597,7 +596,6 @@ class StatsView(TemplateView, JsonResponse):
                     # It's a ConfirmedOpenId
                     top_photos_data.append(
                         {
-                            "openid": top_associated.openid,
                             "access_count": top_associated.access_count,
                             "digest_sha256": top_associated.digest,
                         }
@@ -611,7 +609,6 @@ class StatsView(TemplateView, JsonResponse):
         for email in top_emails:
             top_emails_data.append(
                 {
-                    "email": email.email,
                     "access_count": email.access_count,
                     "digest_sha256": email.digest_sha256,
                 }
@@ -625,7 +622,6 @@ class StatsView(TemplateView, JsonResponse):
         for openid in top_openids:
             top_openids_data.append(
                 {
-                    "openid": openid.openid,
                     "access_count": openid.access_count,
                     "digest_sha256": openid.digest,
                 }
@@ -678,42 +674,10 @@ class StatsView(TemplateView, JsonResponse):
         total_bluesky_handles = bluesky_emails + bluesky_openids
 
         # Top Bluesky handles by access count
-        top_bluesky_emails = ConfirmedEmail.objects.filter(
-            bluesky_handle__isnull=False
-        ).order_by("-access_count")[:5]
-
-        top_bluesky_openids = ConfirmedOpenId.objects.filter(
-            bluesky_handle__isnull=False
-        ).order_by("-access_count")[:5]
-
-        top_bluesky_handles = []
-        for email in top_bluesky_emails:
-            top_bluesky_handles.append(
-                {
-                    "handle": email.bluesky_handle,
-                    "access_count": email.access_count,
-                    "type": "email",
-                }
-            )
-
-        for openid in top_bluesky_openids:
-            top_bluesky_handles.append(
-                {
-                    "handle": openid.bluesky_handle,
-                    "access_count": openid.access_count,
-                    "type": "openid",
-                }
-            )
-
-        # Sort by access count and take top 5
-        top_bluesky_handles.sort(key=lambda x: x["access_count"], reverse=True)
-        top_bluesky_handles = top_bluesky_handles[:5]
-
         retval["bluesky_handles"] = {
             "total_bluesky_handles": total_bluesky_handles,
             "bluesky_emails": bluesky_emails,
             "bluesky_openids": bluesky_openids,
-            "top_bluesky_handles": top_bluesky_handles,
         }
 
         # Average photo size statistics using raw SQL

From 10255296d58a7bd73f26f5ad446f581db2afe5f8 Mon Sep 17 00:00:00 2001
From: Oliver Falk <oliver@linux-kernel.at>
Date: Wed, 24 Sep 2025 09:37:26 +0200
Subject: [PATCH 5/9] Fix SQLite AVG() type conversion in photo size stats

- Convert avg_size_bytes to float to handle SQLite returning string values
- Fixes TypeError: '>' not supported between instances of 'str' and 'int'
- Ensures photo size statistics work correctly across different database backends
- All 5 stats tests pass successfully

The issue occurred because SQLite's AVG() function can return string representations
of numbers in some cases, causing type comparison errors in tests.
---
 ivatar/views.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/ivatar/views.py b/ivatar/views.py
index 997a824..45f6bf9 100644
--- a/ivatar/views.py
+++ b/ivatar/views.py
@@ -698,6 +698,8 @@ class StatsView(TemplateView, JsonResponse):
 
             if result and result[0] > 0:
                 photo_count, avg_size_bytes = result
+                # Convert to float in case SQLite returns string
+                avg_size_bytes = float(avg_size_bytes) if avg_size_bytes else 0
                 avg_size_kb = round(avg_size_bytes / 1024, 2) if avg_size_bytes else 0
                 avg_size_mb = (
                     round(avg_size_bytes / (1024 * 1024), 2) if avg_size_bytes else 0

From 44a738d72457114cb3e0955f9d7fc0bddd67ea05 Mon Sep 17 00:00:00 2001
From: Oliver Falk <oliver@linux-kernel.at>
Date: Wed, 24 Sep 2025 09:37:54 +0200
Subject: [PATCH 6/9] Fix code comment

---
 ivatar/views.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ivatar/views.py b/ivatar/views.py
index 45f6bf9..25cc3b3 100644
--- a/ivatar/views.py
+++ b/ivatar/views.py
@@ -698,7 +698,7 @@ class StatsView(TemplateView, JsonResponse):
 
             if result and result[0] > 0:
                 photo_count, avg_size_bytes = result
-                # Convert to float in case SQLite returns string
+                # Convert to float in case database returns string
                 avg_size_bytes = float(avg_size_bytes) if avg_size_bytes else 0
                 avg_size_kb = round(avg_size_bytes / 1024, 2) if avg_size_bytes else 0
                 avg_size_mb = (

From 2fbdd74619813b350f118f7915d348fdba617a16 Mon Sep 17 00:00:00 2001
From: Oliver Falk <oliver@linux-kernel.at>
Date: Wed, 24 Sep 2025 17:11:50 +0200
Subject: [PATCH 7/9] Use newer image, now with the new server also having
 newer Python

---
 .gitlab-ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 221e37a..29fb90e 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -1,5 +1,5 @@
 image:
-  name: quay.io/rhn_support_ofalk/fedora36-python3
+  name: git.linux-kernel.at:5050/oliver/fedora42-python3:latest
   entrypoint:
   - "/bin/sh"
   - "-c"

From 928ffaea76615dbbc4f274d98fa0d500bdd958a9 Mon Sep 17 00:00:00 2001
From: Oliver Falk <oliver@linux-kernel.at>
Date: Wed, 24 Sep 2025 17:27:43 +0200
Subject: [PATCH 8/9] Switch to my version until upstream is fixed

---
 requirements.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 8b3d9bd..0f8c139 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -15,9 +15,10 @@ fabric
 flake8-respect-noqa
 git+https://github.com/daboth/pagan.git
 git+https://github.com/ercpe/pydenticon5.git
-git+https://github.com/flavono123/identicon.git
 git+https://github.com/necaris/python3-openid.git
 git+https://github.com/ofalk/django-openid-auth
+#git+https://github.com/flavono123/identicon.git
+git+https://github.com/ofalk/identicon.git
 git+https://github.com/ofalk/monsterid.git
 git+https://github.com/ofalk/Robohash.git@devel
 notsetuptools

From 15062b3cda5985bed5b779e11550eb9eb8c18c0c Mon Sep 17 00:00:00 2001
From: Oliver Falk <oliver@linux-kernel.at>
Date: Fri, 26 Sep 2025 09:21:00 +0200
Subject: [PATCH 9/9] Return full avatar URLs instead of digests in stats

- Replace digest_sha256 with avatar_url in top_viewed_avatars
- Replace digest_sha256 with avatar_url in top_queried_emails
- Replace digest_sha256 with avatar_url in top_queried_openids
- All avatar URLs now use https://libravatar.org/avatar/{digest} format
- Update tests to verify avatar_url presence and correct format
- All 5 stats tests pass successfully

This makes the stats API more user-friendly by providing complete
avatar URLs that can be directly used in applications instead of
requiring clients to construct the URLs themselves.
---
 ivatar/test_views_stats.py | 24 ++++++++++++++++++++++++
 ivatar/views.py            |  8 ++++----
 2 files changed, 28 insertions(+), 4 deletions(-)

diff --git a/ivatar/test_views_stats.py b/ivatar/test_views_stats.py
index 309db08..4a951d1 100644
--- a/ivatar/test_views_stats.py
+++ b/ivatar/test_views_stats.py
@@ -160,6 +160,14 @@ class StatsTester(TestCase):
             100,
             "top avatar access count incorrect",
         )
+        # Check that avatar_url is present and starts with the correct base URL
+        self.assertIn("avatar_url", j["top_viewed_avatars"][0], "avatar_url missing")
+        self.assertTrue(
+            j["top_viewed_avatars"][0]["avatar_url"].startswith(
+                "https://libravatar.org/avatar/"
+            ),
+            "avatar_url should start with https://libravatar.org/avatar/",
+        )
 
         # Test top queried emails
         self.assertIn("top_queried_emails", j, "top_queried_emails missing")
@@ -171,6 +179,14 @@ class StatsTester(TestCase):
             100,
             "top email access count incorrect",
         )
+        # Check that avatar_url is present and starts with the correct base URL
+        self.assertIn("avatar_url", j["top_queried_emails"][0], "avatar_url missing")
+        self.assertTrue(
+            j["top_queried_emails"][0]["avatar_url"].startswith(
+                "https://libravatar.org/avatar/"
+            ),
+            "avatar_url should start with https://libravatar.org/avatar/",
+        )
 
         # Test top queried openids
         self.assertIn("top_queried_openids", j, "top_queried_openids missing")
@@ -182,6 +198,14 @@ class StatsTester(TestCase):
             75,
             "top openid access count incorrect",
         )
+        # Check that avatar_url is present and starts with the correct base URL
+        self.assertIn("avatar_url", j["top_queried_openids"][0], "avatar_url missing")
+        self.assertTrue(
+            j["top_queried_openids"][0]["avatar_url"].startswith(
+                "https://libravatar.org/avatar/"
+            ),
+            "avatar_url should start with https://libravatar.org/avatar/",
+        )
 
         # Test photo format distribution
         self.assertIn(
diff --git a/ivatar/views.py b/ivatar/views.py
index 25cc3b3..a0d43d9 100644
--- a/ivatar/views.py
+++ b/ivatar/views.py
@@ -589,7 +589,7 @@ class StatsView(TemplateView, JsonResponse):
                     top_photos_data.append(
                         {
                             "access_count": top_associated.access_count,
-                            "digest_sha256": top_associated.digest_sha256,
+                            "avatar_url": f"https://libravatar.org/avatar/{top_associated.digest_sha256}",
                         }
                     )
                 else:
@@ -597,7 +597,7 @@ class StatsView(TemplateView, JsonResponse):
                     top_photos_data.append(
                         {
                             "access_count": top_associated.access_count,
-                            "digest_sha256": top_associated.digest,
+                            "avatar_url": f"https://libravatar.org/avatar/{top_associated.digest}",
                         }
                     )
 
@@ -610,7 +610,7 @@ class StatsView(TemplateView, JsonResponse):
             top_emails_data.append(
                 {
                     "access_count": email.access_count,
-                    "digest_sha256": email.digest_sha256,
+                    "avatar_url": f"https://libravatar.org/avatar/{email.digest_sha256}",
                 }
             )
 
@@ -623,7 +623,7 @@ class StatsView(TemplateView, JsonResponse):
             top_openids_data.append(
                 {
                     "access_count": openid.access_count,
-                    "digest_sha256": openid.digest,
+                    "avatar_url": f"https://libravatar.org/avatar/{openid.digest}",
                 }
             )