Coverage for app / backend / src / couchers / metrics.py: 100%
105 statements
« prev ^ index » next coverage.py v7.13.2, created at 2026-02-03 06:18 +0000
« prev ^ index » next coverage.py v7.13.2, created at 2026-02-03 06:18 +0000
1import threading
2from collections.abc import Callable
3from datetime import timedelta
4from typing import Any
6from opentelemetry import trace
7from prometheus_client import (
8 CONTENT_TYPE_LATEST,
9 CollectorRegistry,
10 Counter,
11 Gauge,
12 Histogram,
13 exposition,
14 generate_latest,
15 multiprocess,
16)
17from prometheus_client.registry import CollectorRegistry
18from sqlalchemy import select
19from sqlalchemy.sql import distinct, func
20from sqlalchemy.sql.selectable import Select
22from couchers.db import session_scope
23from couchers.helpers.completed_profile import has_completed_profile_expression
24from couchers.models import BackgroundJob, EventOccurrenceAttendee, HostingStatus, HostRequest, Message, Reference, User
25from couchers.models.moderation import (
26 ModerationAction,
27 ModerationObjectType,
28 ModerationQueueItem,
29 ModerationState,
30 ModerationTrigger,
31 ModerationVisibility,
32)
34tracer = trace.get_tracer(__name__)
36registry: CollectorRegistry = CollectorRegistry()
37multiprocess.MultiProcessCollector(registry) # type: ignore[no-untyped-call]
39_INF: float = float("inf")
41jobs_duration_histogram: Histogram = Histogram(
42 "couchers_background_jobs_seconds",
43 "Durations of background jobs",
44 labelnames=["job", "status", "attempt", "exception"],
45)
48def observe_in_jobs_duration_histogram(
49 job_type: str, job_state: str, try_count: int, exception_name: str, duration_s: float
50) -> None:
51 jobs_duration_histogram.labels(job_type, job_state, str(try_count), exception_name).observe(duration_s)
54jobs_queued_histogram: Histogram = Histogram(
55 "couchers_background_jobs_queued_seconds",
56 "Time background job spent queued before being picked up",
57 buckets=(0.01, 0.05, 0.1, 0.5, 1.0, 2.5, 5.0, 10, 20, 30, 40, 50, 60, 90, 120, 300, 600, 1800, 3600, _INF),
58)
61servicer_duration_histogram: Histogram = Histogram(
62 "couchers_servicer_duration_seconds",
63 "Durations of processing gRPC calls",
64 labelnames=["method", "logged_in", "code", "exception"],
65)
68def observe_in_servicer_duration_histogram(
69 method: str, user_id: Any, status_code: str, exception_type: str, duration_s: float
70) -> None:
71 servicer_duration_histogram.labels(method, user_id is not None, status_code, exception_type).observe(duration_s)
74# list of gauge names and function to execute to set value to
75# the python prometheus client does not support Gauge.set_function, so instead we hack around it and set each gauge just
76# before collection with this
77_set_hacky_gauges_funcs: list[tuple[Gauge, Callable[[], Any]]] = []
80def _make_gauge_from_query(name: str, description: str, statement: Select[Any]) -> Gauge:
81 """
82 Given a name, description and statement that is a sqlalchemy statement, creates a gauge from it
84 statement should be a sqlalchemy SELECT statement that returns a single number
85 """
87 def f() -> Any:
88 with tracer.start_as_current_span(f"metric.{name}"):
89 with session_scope() as session:
90 return session.execute(statement).scalar_one()
92 gauge = Gauge(name, description, multiprocess_mode="mostrecent")
93 _set_hacky_gauges_funcs.append((gauge, f))
94 return gauge
97active_users_gauges: list[Gauge] = [
98 _make_gauge_from_query(
99 f"couchers_active_users_{name}",
100 f"Number of active users in the last {description}",
101 (select(func.count()).select_from(User).where(User.is_visible).where(User.last_active > func.now() - interval)),
102 )
103 for name, description, interval in [
104 ("5m", "5 min", timedelta(minutes=5)),
105 ("24h", "24 hours", timedelta(hours=24)),
106 ("1month", "1 month", timedelta(days=31)),
107 ("3month", "3 months", timedelta(days=92)),
108 ("6month", "6 months", timedelta(days=183)),
109 ("12month", "12 months", timedelta(days=365)),
110 ]
111]
113users_gauge: Gauge = _make_gauge_from_query(
114 "couchers_users", "Total number of users", select(func.count()).select_from(User).where(User.is_visible)
115)
117man_gauge: Gauge = _make_gauge_from_query(
118 "couchers_users_man",
119 "Total number of users with gender 'Man'",
120 select(func.count()).select_from(User).where(User.is_visible).where(User.gender == "Man"),
121)
123woman_gauge: Gauge = _make_gauge_from_query(
124 "couchers_users_woman",
125 "Total number of users with gender 'Woman'",
126 select(func.count()).select_from(User).where(User.is_visible).where(User.gender == "Woman"),
127)
129nonbinary_gauge: Gauge = _make_gauge_from_query(
130 "couchers_users_nonbinary",
131 "Total number of users with gender 'Non-binary'",
132 select(func.count()).select_from(User).where(User.is_visible).where(User.gender == "Non-binary"),
133)
135can_host_gauge: Gauge = _make_gauge_from_query(
136 "couchers_users_can_host",
137 "Total number of users with hosting status 'can_host'",
138 select(func.count()).select_from(User).where(User.is_visible).where(User.hosting_status == HostingStatus.can_host),
139)
141cant_host_gauge: Gauge = _make_gauge_from_query(
142 "couchers_users_cant_host",
143 "Total number of users with hosting status 'cant_host'",
144 select(func.count()).select_from(User).where(User.is_visible).where(User.hosting_status == HostingStatus.cant_host),
145)
147maybe_gauge: Gauge = _make_gauge_from_query(
148 "couchers_users_maybe",
149 "Total number of users with hosting status 'maybe'",
150 select(func.count()).select_from(User).where(User.is_visible).where(User.hosting_status == HostingStatus.maybe),
151)
153completed_profile_gauge: Gauge = _make_gauge_from_query(
154 "couchers_users_completed_profile",
155 "Total number of users with a completed profile",
156 select(func.count()).select_from(User).where(User.is_visible).where(has_completed_profile_expression()),
157)
159completed_my_home_gauge: Gauge = _make_gauge_from_query(
160 "couchers_users_completed_my_home",
161 "Total number of users with a completed my home section",
162 select(func.count()).select_from(User).where(User.is_visible).where(User.has_completed_my_home),
163)
165sent_message_gauge: Gauge = _make_gauge_from_query(
166 "couchers_users_sent_message",
167 "Total number of users who have sent a message",
168 (select(func.count(distinct(Message.author_id))).join(User, User.id == Message.author_id).where(User.is_visible)),
169)
171sent_request_gauge: Gauge = _make_gauge_from_query(
172 "couchers_users_sent_request",
173 "Total number of users who have sent a host request",
174 (
175 select(func.count(distinct(HostRequest.surfer_user_id)))
176 .join(User, User.id == HostRequest.surfer_user_id)
177 .where(User.is_visible)
178 ),
179)
181has_reference_gauge: Gauge = _make_gauge_from_query(
182 "couchers_users_has_reference",
183 "Total number of users who have a reference",
184 (
185 select(func.count(distinct(Reference.to_user_id)))
186 .join(User, User.id == Reference.to_user_id)
187 .where(User.is_visible)
188 ),
189)
191rsvpd_to_event_gauge: Gauge = _make_gauge_from_query(
192 "couchers_users_rsvpd_to_event",
193 "Total number of users who have RSVPd to an event",
194 (
195 select(func.count(distinct(EventOccurrenceAttendee.user_id)))
196 .join(User, User.id == EventOccurrenceAttendee.user_id)
197 .where(User.is_visible)
198 ),
199)
201background_jobs_ready_to_execute_gauge: Gauge = _make_gauge_from_query(
202 "couchers_background_jobs_ready_to_execute",
203 "Total number of background jobs ready to execute",
204 select(func.count()).select_from(BackgroundJob).where(BackgroundJob.ready_for_retry),
205)
207background_jobs_serialization_errors_counter: Counter = Counter(
208 "couchers_background_jobs_serialization_errors_total",
209 "Number of times a bg worker has a serialization error",
210)
212background_jobs_no_jobs_counter: Counter = Counter(
213 "couchers_background_jobs_no_jobs_total",
214 "Number of times a bg worker tries to grab a job but there is none",
215)
217background_jobs_got_job_counter: Counter = Counter(
218 "couchers_background_jobs_got_job_total",
219 "Number of times a bg worker grabbed a job",
220)
223signup_initiations_counter: Counter = Counter(
224 "couchers_signup_initiations_total",
225 "Number of initiated signups",
226)
227signup_completions_counter: Counter = Counter(
228 "couchers_signup_completions_total",
229 "Number of completed signups",
230 labelnames=["gender"],
231)
232signup_time_histogram: Histogram = Histogram(
233 "couchers_signup_time_seconds",
234 "Time taken for a user to sign up",
235 labelnames=["gender"],
236 buckets=(30, 60, 90, 120, 180, 240, 300, 360, 420, 480, 540, 600, 900, 1200, 1800, 3600, 7200, _INF),
237)
239logins_counter: Counter = Counter(
240 "couchers_logins_total",
241 "Number of logins",
242 labelnames=["gender"],
243)
245password_reset_initiations_counter: Counter = Counter(
246 "couchers_password_reset_initiations_total",
247 "Number of password reset initiations",
248)
249password_reset_completions_counter: Counter = Counter(
250 "couchers_password_reset_completions_total",
251 "Number of password reset completions",
252)
254account_deletion_initiations_counter: Counter = Counter(
255 "couchers_account_deletion_initiations_total",
256 "Number of account deletion initiations",
257 labelnames=["gender"],
258)
259account_deletion_completions_counter: Counter = Counter(
260 "couchers_account_deletion_completions_total",
261 "Number of account deletion completions",
262 labelnames=["gender"],
263)
264account_recoveries_counter: Counter = Counter(
265 "couchers_account_recoveries_total",
266 "Number of account recoveries",
267 labelnames=["gender"],
268)
270strong_verification_initiations_counter: Counter = Counter(
271 "couchers_strong_verification_initiations_total",
272 "Number of strong verification initiations",
273 labelnames=["gender"],
274)
275strong_verification_completions_counter: Counter = Counter(
276 "couchers_strong_verification_completions_total",
277 "Number of strong verification completions",
278)
279strong_verification_data_deletions_counter: Counter = Counter(
280 "couchers_strong_verification_data_deletions_total",
281 "Number of strong verification data deletions",
282 labelnames=["gender"],
283)
285host_requests_sent_counter: Counter = Counter(
286 "couchers_host_requests_total",
287 "Number of host requests sent",
288 labelnames=["from_gender", "to_gender"],
289)
290host_request_responses_counter: Counter = Counter(
291 "couchers_host_requests_responses_total",
292 "Number of responses to host requests",
293 labelnames=["responder_gender", "other_gender", "response_type"],
294)
296sent_messages_counter: Counter = Counter(
297 "couchers_sent_messages_total",
298 "Number of messages sent",
299 labelnames=["gender", "message_type"],
300)
303push_notification_counter: Counter = Counter(
304 "couchers_push_notification_total",
305 "Number of push notification delivery attempts",
306 labelnames=["platform", "outcome"],
307)
308emails_counter: Counter = Counter(
309 "couchers_emails_total",
310 "Number of emails sent",
311)
314recaptchas_assessed_counter: Counter = Counter(
315 "couchers_recaptchas_assessed_total",
316 "Number of times a recaptcha assessment is created",
317 labelnames=["action"],
318)
320recaptcha_score_histogram: Histogram = Histogram(
321 "couchers_recaptcha_score",
322 "Score of recaptcha assessments",
323 labelnames=["action"],
324 buckets=tuple(x / 20 for x in range(0, 21)),
325)
327host_request_first_response_histogram: Histogram = Histogram(
328 "couchers_host_request_first_response_seconds",
329 "Response time to host requests",
330 labelnames=["host_gender", "surfer_gender", "response_type"],
331 buckets=(
332 1 * 60, # 1m
333 2 * 60, # 2m
334 5 * 60, # 5m
335 10 * 60, # 10m
336 15 * 60, # 15m
337 30 * 60, # 30m
338 45 * 60, # 45m
339 3_600, # 1h
340 2 * 3_600, # 2h
341 3 * 3_600, # 3h
342 6 * 3_600, # 6h
343 12 * 3_600, # 12h
344 86_400, # 24h
345 2 * 86_400, # 2d
346 5 * 86_400, # 4d
347 602_000, # 1w
348 2 * 602_000, # 2w
349 3 * 602_000, # 3w
350 4 * 602_000, # 4w
351 _INF,
352 ),
353)
354account_age_on_host_request_create_histogram: Histogram = Histogram(
355 "couchers_account_age_on_host_request_create_histogram_seconds",
356 "Age of account sending a host request",
357 labelnames=["surfer_gender", "host_gender"],
358 buckets=(
359 5 * 60, # 5m
360 10 * 60, # 10m
361 15 * 60, # 15m
362 30 * 60, # 30m
363 45 * 60, # 45m
364 3_600, # 1h
365 2 * 3_600, # 2h
366 3 * 3_600, # 3h
367 6 * 3_600, # 6h
368 12 * 3_600, # 12h
369 86_400, # 24h
370 2 * 86_400, # 2d
371 3 * 86_400, # 3d
372 4 * 86_400, # 4d
373 5 * 86_400, # 5d
374 6 * 86_400, # 6d
375 602_000, # 1w
376 2 * 602_000, # 2w
377 3 * 602_000, # 3w
378 4 * 602_000, # 4w
379 5 * 602_000, # 5w
380 10 * 602_000, # 10w
381 25 * 602_000, # 25w
382 52 * 602_000, # 52w
383 104 * 602_000, # 104w
384 _INF,
385 ),
386)
389# =============================================================================
390# Moderation metrics
391# =============================================================================
393# Gauges: Queue lengths
394moderation_queue_length_gauge: Gauge = _make_gauge_from_query(
395 "couchers_moderation_queue_length",
396 "Total number of unresolved items in the moderation queue",
397 select(func.count()).select_from(ModerationQueueItem).where(ModerationQueueItem.resolved_by_log_id.is_(None)),
398)
400moderation_queue_length_by_trigger_gauges: list[Gauge] = [
401 _make_gauge_from_query(
402 f"couchers_moderation_queue_length_{trigger.name.lower()}",
403 f"Number of unresolved items in the moderation queue with trigger {trigger.name}",
404 select(func.count())
405 .select_from(ModerationQueueItem)
406 .where(ModerationQueueItem.resolved_by_log_id.is_(None))
407 .where(ModerationQueueItem.trigger == trigger),
408 )
409 for trigger in ModerationTrigger
410]
412moderation_queue_length_by_object_type_gauges: list[Gauge] = [
413 _make_gauge_from_query(
414 f"couchers_moderation_queue_length_{object_type.name.lower()}",
415 f"Number of unresolved items in the moderation queue for {object_type.name}",
416 select(func.count())
417 .select_from(ModerationQueueItem)
418 .join(ModerationState, ModerationQueueItem.moderation_state_id == ModerationState.id)
419 .where(ModerationQueueItem.resolved_by_log_id.is_(None))
420 .where(ModerationState.object_type == object_type),
421 )
422 for object_type in ModerationObjectType
423]
425# Gauges: Items in each visibility state by object type
426moderation_visibility_gauges: list[Gauge] = [
427 _make_gauge_from_query(
428 f"couchers_moderation_items_{object_type.name.lower()}_{visibility.name.lower()}",
429 f"Number of {object_type.name} items with visibility {visibility.name}",
430 select(func.count())
431 .select_from(ModerationState)
432 .where(ModerationState.object_type == object_type)
433 .where(ModerationState.visibility == visibility),
434 )
435 for object_type in ModerationObjectType
436 for visibility in ModerationVisibility
437]
439# Counters: Moderation actions taken
440moderation_actions_counter: Counter = Counter(
441 "couchers_moderation_actions_total",
442 "Number of moderation actions taken",
443 labelnames=["action", "object_type"],
444)
447def observe_moderation_action(action: ModerationAction, object_type: ModerationObjectType) -> None:
448 moderation_actions_counter.labels(action.name, object_type.name).inc()
451# Counters: Visibility state transitions
452moderation_visibility_transitions_counter: Counter = Counter(
453 "couchers_moderation_visibility_transitions_total",
454 "Number of visibility state transitions",
455 labelnames=["from_visibility", "to_visibility", "object_type"],
456)
459def observe_moderation_visibility_transition(
460 from_visibility: ModerationVisibility, to_visibility: ModerationVisibility, object_type: ModerationObjectType
461) -> None:
462 moderation_visibility_transitions_counter.labels(from_visibility.name, to_visibility.name, object_type.name).inc()
465# Counters: Auto-approved items
466moderation_auto_approved_counter: Counter = Counter(
467 "couchers_moderation_auto_approved_total",
468 "Number of items that were auto-approved",
469)
472# Counters: Queue items created
473moderation_queue_items_created_counter: Counter = Counter(
474 "couchers_moderation_queue_items_created_total",
475 "Number of moderation queue items created",
476 labelnames=["trigger", "object_type"],
477)
480def observe_moderation_queue_item_created(trigger: ModerationTrigger, object_type: ModerationObjectType) -> None:
481 moderation_queue_items_created_counter.labels(trigger.name, object_type.name).inc()
484# Counters: Queue items resolved
485moderation_queue_items_resolved_counter: Counter = Counter(
486 "couchers_moderation_queue_items_resolved_total",
487 "Number of moderation queue items resolved",
488 labelnames=["trigger", "action", "object_type"],
489)
492def observe_moderation_queue_item_resolved(
493 trigger: ModerationTrigger, action: ModerationAction, object_type: ModerationObjectType
494) -> None:
495 moderation_queue_items_resolved_counter.labels(trigger.name, action.name, object_type.name).inc()
498# Histogram: Time to resolve queue items
499moderation_queue_resolution_time_histogram: Histogram = Histogram(
500 "couchers_moderation_queue_resolution_seconds",
501 "Time taken to resolve moderation queue items",
502 labelnames=["trigger", "action", "object_type"],
503 buckets=(
504 0.1,
505 0.25,
506 0.5,
507 1,
508 2.5,
509 5,
510 10,
511 30,
512 60,
513 5 * 60,
514 15 * 60,
515 30 * 60,
516 3_600,
517 2 * 3_600,
518 6 * 3_600,
519 12 * 3_600,
520 86_400,
521 2 * 86_400,
522 3 * 86_400,
523 7 * 86_400,
524 14 * 86_400,
525 30 * 86_400,
526 _INF,
527 ),
528)
531def observe_moderation_queue_resolution_time(
532 trigger: ModerationTrigger, action: ModerationAction, object_type: ModerationObjectType, duration_s: float
533) -> None:
534 moderation_queue_resolution_time_histogram.labels(trigger.name, action.name, object_type.name).observe(duration_s)
537def create_prometheus_server(port: int) -> Any:
538 """custom start method to fix problem descrbied in https://github.com/prometheus/client_python/issues/155"""
540 def app(environ: Any, start_response: Any) -> Any:
541 # set hacky gauges
542 for gauge, f in _set_hacky_gauges_funcs:
543 gauge.set(f())
545 data = generate_latest(registry)
546 start_response("200 OK", [("Content-type", CONTENT_TYPE_LATEST), ("Content-Length", str(len(data)))])
547 return [data]
549 httpd = exposition.make_server( # type: ignore[attr-defined]
550 "", port, app, exposition.ThreadingWSGIServer, handler_class=exposition._SilentHandler
551 )
552 t = threading.Thread(target=httpd.serve_forever)
553 t.daemon = True
554 t.start()
555 return httpd