Coverage for src/couchers/metrics.py: 100%
83 statements
« prev ^ index » next coverage.py v7.11.0, created at 2025-12-07 17:25 +0000
« prev ^ index » next coverage.py v7.11.0, created at 2025-12-07 17:25 +0000
1import threading
2from collections.abc import Callable
3from datetime import timedelta
4from typing import Any
6from opentelemetry import trace
7from prometheus_client import (
8 CONTENT_TYPE_LATEST,
9 CollectorRegistry,
10 Counter,
11 Gauge,
12 Histogram,
13 exposition,
14 generate_latest,
15 multiprocess,
16)
17from prometheus_client.registry import CollectorRegistry
18from sqlalchemy.sql import distinct, func
19from sqlalchemy.sql.selectable import Select
21from couchers.db import session_scope
22from couchers.models import BackgroundJob, EventOccurrenceAttendee, HostingStatus, HostRequest, Message, Reference, User
23from couchers.sql import couchers_select as select
25tracer = trace.get_tracer(__name__)
27registry: CollectorRegistry = CollectorRegistry()
28multiprocess.MultiProcessCollector(registry) # type: ignore[no-untyped-call]
30_INF: float = float("inf")
32jobs_duration_histogram: Histogram = Histogram(
33 "couchers_background_jobs_seconds",
34 "Durations of background jobs",
35 labelnames=["job", "status", "attempt", "exception"],
36)
39def observe_in_jobs_duration_histogram(
40 job_type: str, job_state: str, try_count: int, exception_name: str, duration_s: float
41) -> None:
42 jobs_duration_histogram.labels(job_type, job_state, str(try_count), exception_name).observe(duration_s)
45jobs_queued_histogram: Histogram = Histogram(
46 "couchers_background_jobs_queued_seconds",
47 "Time background job spent queued before being picked up",
48 buckets=(0.01, 0.05, 0.1, 0.5, 1.0, 2.5, 5.0, 10, 20, 30, 40, 50, 60, 90, 120, 300, 600, 1800, 3600, _INF),
49)
52servicer_duration_histogram: Histogram = Histogram(
53 "couchers_servicer_duration_seconds",
54 "Durations of processing gRPC calls",
55 labelnames=["method", "logged_in", "code", "exception"],
56)
59def observe_in_servicer_duration_histogram(
60 method: str, user_id: Any, status_code: str, exception_type: str, duration_s: float
61) -> None:
62 servicer_duration_histogram.labels(method, user_id is not None, status_code, exception_type).observe(duration_s)
65# list of gauge names and function to execute to set value to
66# the python prometheus client does not support Gauge.set_function, so instead we hack around it and set each gauge just
67# before collection with this
68_set_hacky_gauges_funcs: list[tuple[Gauge, Callable[[], Any]]] = []
71def _make_gauge_from_query(name: str, description: str, statement: Select[Any]) -> Gauge:
72 """
73 Given a name, description and statement that is a sqlalchemy statement, creates a gauge from it
75 statement should be a sqlalchemy SELECT statement that returns a single number
76 """
78 def f() -> Any:
79 with tracer.start_as_current_span(f"metric.{name}"):
80 with session_scope() as session:
81 return session.execute(statement).scalar_one()
83 gauge = Gauge(name, description, multiprocess_mode="mostrecent")
84 _set_hacky_gauges_funcs.append((gauge, f))
85 return gauge
88active_users_gauges: list[Gauge] = [
89 _make_gauge_from_query(
90 f"couchers_active_users_{name}",
91 f"Number of active users in the last {description}",
92 (select(func.count()).select_from(User).where(User.is_visible).where(User.last_active > func.now() - interval)),
93 )
94 for name, description, interval in [
95 ("5m", "5 min", timedelta(minutes=5)),
96 ("24h", "24 hours", timedelta(hours=24)),
97 ("1month", "1 month", timedelta(days=31)),
98 ("3month", "3 months", timedelta(days=92)),
99 ("6month", "6 months", timedelta(days=183)),
100 ("12month", "12 months", timedelta(days=365)),
101 ]
102]
104users_gauge: Gauge = _make_gauge_from_query(
105 "couchers_users", "Total number of users", select(func.count()).select_from(User).where(User.is_visible)
106)
108man_gauge: Gauge = _make_gauge_from_query(
109 "couchers_users_man",
110 "Total number of users with gender 'Man'",
111 select(func.count()).select_from(User).where(User.is_visible).where(User.gender == "Man"),
112)
114woman_gauge: Gauge = _make_gauge_from_query(
115 "couchers_users_woman",
116 "Total number of users with gender 'Woman'",
117 select(func.count()).select_from(User).where(User.is_visible).where(User.gender == "Woman"),
118)
120nonbinary_gauge: Gauge = _make_gauge_from_query(
121 "couchers_users_nonbinary",
122 "Total number of users with gender 'Non-binary'",
123 select(func.count()).select_from(User).where(User.is_visible).where(User.gender == "Non-binary"),
124)
126can_host_gauge: Gauge = _make_gauge_from_query(
127 "couchers_users_can_host",
128 "Total number of users with hosting status 'can_host'",
129 select(func.count()).select_from(User).where(User.is_visible).where(User.hosting_status == HostingStatus.can_host),
130)
132cant_host_gauge: Gauge = _make_gauge_from_query(
133 "couchers_users_cant_host",
134 "Total number of users with hosting status 'cant_host'",
135 select(func.count()).select_from(User).where(User.is_visible).where(User.hosting_status == HostingStatus.cant_host),
136)
138maybe_gauge: Gauge = _make_gauge_from_query(
139 "couchers_users_maybe",
140 "Total number of users with hosting status 'maybe'",
141 select(func.count()).select_from(User).where(User.is_visible).where(User.hosting_status == HostingStatus.maybe),
142)
144completed_profile_gauge: Gauge = _make_gauge_from_query(
145 "couchers_users_completed_profile",
146 "Total number of users with a completed profile",
147 select(func.count()).select_from(User).where(User.is_visible).where(User.has_completed_profile),
148)
150completed_my_home_gauge: Gauge = _make_gauge_from_query(
151 "couchers_users_completed_my_home",
152 "Total number of users with a completed my home section",
153 select(func.count()).select_from(User).where(User.is_visible).where(User.has_completed_my_home),
154)
156sent_message_gauge: Gauge = _make_gauge_from_query(
157 "couchers_users_sent_message",
158 "Total number of users who have sent a message",
159 (select(func.count(distinct(Message.author_id))).join(User, User.id == Message.author_id).where(User.is_visible)),
160)
162sent_request_gauge: Gauge = _make_gauge_from_query(
163 "couchers_users_sent_request",
164 "Total number of users who have sent a host request",
165 (
166 select(func.count(distinct(HostRequest.surfer_user_id)))
167 .join(User, User.id == HostRequest.surfer_user_id)
168 .where(User.is_visible)
169 ),
170)
172has_reference_gauge: Gauge = _make_gauge_from_query(
173 "couchers_users_has_reference",
174 "Total number of users who have a reference",
175 (
176 select(func.count(distinct(Reference.to_user_id)))
177 .join(User, User.id == Reference.to_user_id)
178 .where(User.is_visible)
179 ),
180)
182rsvpd_to_event_gauge: Gauge = _make_gauge_from_query(
183 "couchers_users_rsvpd_to_event",
184 "Total number of users who have RSVPd to an event",
185 (
186 select(func.count(distinct(EventOccurrenceAttendee.user_id)))
187 .join(User, User.id == EventOccurrenceAttendee.user_id)
188 .where(User.is_visible)
189 ),
190)
192background_jobs_ready_to_execute_gauge: Gauge = _make_gauge_from_query(
193 "couchers_background_jobs_ready_to_execute",
194 "Total number of background jobs ready to execute",
195 select(func.count()).select_from(BackgroundJob).where(BackgroundJob.ready_for_retry),
196)
198background_jobs_serialization_errors_counter: Counter = Counter(
199 "couchers_background_jobs_serialization_errors_total",
200 "Number of times a bg worker has a serialization error",
201)
203background_jobs_no_jobs_counter: Counter = Counter(
204 "couchers_background_jobs_no_jobs_total",
205 "Number of times a bg worker tries to grab a job but there is none",
206)
208background_jobs_got_job_counter: Counter = Counter(
209 "couchers_background_jobs_got_job_total",
210 "Number of times a bg worker grabbed a job",
211)
214signup_initiations_counter: Counter = Counter(
215 "couchers_signup_initiations_total",
216 "Number of initiated signups",
217)
218signup_completions_counter: Counter = Counter(
219 "couchers_signup_completions_total",
220 "Number of completed signups",
221 labelnames=["gender"],
222)
223signup_time_histogram: Histogram = Histogram(
224 "couchers_signup_time_seconds",
225 "Time taken for a user to sign up",
226 labelnames=["gender"],
227 buckets=(30, 60, 90, 120, 180, 240, 300, 360, 420, 480, 540, 600, 900, 1200, 1800, 3600, 7200, _INF),
228)
230logins_counter: Counter = Counter(
231 "couchers_logins_total",
232 "Number of logins",
233 labelnames=["gender"],
234)
236password_reset_initiations_counter: Counter = Counter(
237 "couchers_password_reset_initiations_total",
238 "Number of password reset initiations",
239)
240password_reset_completions_counter: Counter = Counter(
241 "couchers_password_reset_completions_total",
242 "Number of password reset completions",
243)
245account_deletion_initiations_counter: Counter = Counter(
246 "couchers_account_deletion_initiations_total",
247 "Number of account deletion initiations",
248 labelnames=["gender"],
249)
250account_deletion_completions_counter: Counter = Counter(
251 "couchers_account_deletion_completions_total",
252 "Number of account deletion completions",
253 labelnames=["gender"],
254)
255account_recoveries_counter: Counter = Counter(
256 "couchers_account_recoveries_total",
257 "Number of account recoveries",
258 labelnames=["gender"],
259)
261strong_verification_initiations_counter: Counter = Counter(
262 "couchers_strong_verification_initiations_total",
263 "Number of strong verification initiations",
264 labelnames=["gender"],
265)
266strong_verification_completions_counter: Counter = Counter(
267 "couchers_strong_verification_completions_total",
268 "Number of strong verification completions",
269)
270strong_verification_data_deletions_counter: Counter = Counter(
271 "couchers_strong_verification_data_deletions_total",
272 "Number of strong verification data deletions",
273 labelnames=["gender"],
274)
276host_requests_sent_counter: Counter = Counter(
277 "couchers_host_requests_total",
278 "Number of host requests sent",
279 labelnames=["from_gender", "to_gender"],
280)
281host_request_responses_counter: Counter = Counter(
282 "couchers_host_requests_responses_total",
283 "Number of responses to host requests",
284 labelnames=["responder_gender", "other_gender", "response_type"],
285)
287sent_messages_counter: Counter = Counter(
288 "couchers_sent_messages_total",
289 "Number of messages sent",
290 labelnames=["gender", "message_type"],
291)
294push_notification_counter: Counter = Counter(
295 "couchers_push_notification_total",
296 "Number of push notification delivery attempts",
297 labelnames=["platform", "outcome"],
298)
299emails_counter: Counter = Counter(
300 "couchers_emails_total",
301 "Number of emails sent",
302)
305recaptchas_assessed_counter: Counter = Counter(
306 "couchers_recaptchas_assessed_total",
307 "Number of times a recaptcha assessment is created",
308 labelnames=["action"],
309)
311recaptcha_score_histogram: Histogram = Histogram(
312 "couchers_recaptcha_score",
313 "Score of recaptcha assessments",
314 labelnames=["action"],
315 buckets=tuple(x / 20 for x in range(0, 21)),
316)
318host_request_first_response_histogram: Histogram = Histogram(
319 "couchers_host_request_first_response_seconds",
320 "Response time to host requests",
321 labelnames=["host_gender", "surfer_gender", "response_type"],
322 buckets=(
323 1 * 60, # 1m
324 2 * 60, # 2m
325 5 * 60, # 5m
326 10 * 60, # 10m
327 15 * 60, # 15m
328 30 * 60, # 30m
329 45 * 60, # 45m
330 3_600, # 1h
331 2 * 3_600, # 2h
332 3 * 3_600, # 3h
333 6 * 3_600, # 6h
334 12 * 3_600, # 12h
335 86_400, # 24h
336 2 * 86_400, # 2d
337 5 * 86_400, # 4d
338 602_000, # 1w
339 2 * 602_000, # 2w
340 3 * 602_000, # 3w
341 4 * 602_000, # 4w
342 _INF,
343 ),
344)
345account_age_on_host_request_create_histogram: Histogram = Histogram(
346 "couchers_account_age_on_host_request_create_histogram_seconds",
347 "Age of account sending a host request",
348 labelnames=["surfer_gender", "host_gender"],
349 buckets=(
350 5 * 60, # 5m
351 10 * 60, # 10m
352 15 * 60, # 15m
353 30 * 60, # 30m
354 45 * 60, # 45m
355 3_600, # 1h
356 2 * 3_600, # 2h
357 3 * 3_600, # 3h
358 6 * 3_600, # 6h
359 12 * 3_600, # 12h
360 86_400, # 24h
361 2 * 86_400, # 2d
362 3 * 86_400, # 3d
363 4 * 86_400, # 4d
364 5 * 86_400, # 5d
365 6 * 86_400, # 6d
366 602_000, # 1w
367 2 * 602_000, # 2w
368 3 * 602_000, # 3w
369 4 * 602_000, # 4w
370 5 * 602_000, # 5w
371 10 * 602_000, # 10w
372 25 * 602_000, # 25w
373 52 * 602_000, # 52w
374 104 * 602_000, # 104w
375 _INF,
376 ),
377)
380def create_prometheus_server(port: int) -> Any:
381 """custom start method to fix problem descrbied in https://github.com/prometheus/client_python/issues/155"""
383 def app(environ: Any, start_response: Any) -> Any:
384 # set hacky gauges
385 for gauge, f in _set_hacky_gauges_funcs:
386 gauge.set(f())
388 data = generate_latest(registry)
389 start_response("200 OK", [("Content-type", CONTENT_TYPE_LATEST), ("Content-Length", str(len(data)))])
390 return [data]
392 httpd = exposition.make_server( # type: ignore[attr-defined]
393 "", port, app, exposition.ThreadingWSGIServer, handler_class=exposition._SilentHandler
394 )
395 t = threading.Thread(target=httpd.serve_forever)
396 t.daemon = True
397 t.start()
398 return httpd