Coverage for src/couchers/metrics.py: 100%
81 statements
« prev ^ index » next coverage.py v7.6.10, created at 2025-08-28 14:55 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2025-08-28 14:55 +0000
1import threading
2from datetime import timedelta
4from opentelemetry import trace
5from prometheus_client import (
6 CONTENT_TYPE_LATEST,
7 CollectorRegistry,
8 Counter,
9 Gauge,
10 Histogram,
11 exposition,
12 generate_latest,
13 multiprocess,
14)
15from prometheus_client.registry import CollectorRegistry
16from sqlalchemy.sql import distinct, func
18from couchers.db import session_scope
19from couchers.models import BackgroundJob, EventOccurrenceAttendee, HostingStatus, HostRequest, Message, Reference, User
20from couchers.sql import couchers_select as select
22trace = trace.get_tracer(__name__)
24registry = CollectorRegistry()
25multiprocess.MultiProcessCollector(registry)
27_INF = float("inf")
29jobs_duration_histogram = Histogram(
30 "couchers_background_jobs_seconds",
31 "Durations of background jobs",
32 labelnames=["job", "status", "attempt", "exception"],
33)
36def observe_in_jobs_duration_histogram(job_type, job_state, try_count, exception_name, duration_s):
37 jobs_duration_histogram.labels(job_type, job_state, str(try_count), exception_name).observe(duration_s)
40jobs_queued_histogram = Histogram(
41 "couchers_background_jobs_queued_seconds",
42 "Time background job spent queued before being picked up",
43 buckets=(0.01, 0.05, 0.1, 0.5, 1.0, 2.5, 5.0, 10, 20, 30, 40, 50, 60, 90, 120, 300, 600, 1800, 3600, _INF),
44)
47servicer_duration_histogram = Histogram(
48 "couchers_servicer_duration_seconds",
49 "Durations of processing gRPC calls",
50 labelnames=["method", "logged_in", "code", "exception"],
51)
54def observe_in_servicer_duration_histogram(method, user_id, status_code, exception_type, duration_s):
55 servicer_duration_histogram.labels(method, user_id is not None, status_code, exception_type).observe(duration_s)
58# list of gauge names and function to execute to set value to
59# the python prometheus client does not support Gauge.set_function, so instead we hack around it and set each gauge just
60# before collection with this
61_set_hacky_gauges_funcs = []
64def _make_gauge_from_query(name, description, statement):
65 """
66 Given a name, description and statement that is a sqlalchemy statement, creates a gauge from it
68 statement should be a sqlalchemy SELECT statement that returns a single number
69 """
71 def f():
72 with trace.start_as_current_span(f"metric.{name}"):
73 with session_scope() as session:
74 return session.execute(statement).scalar_one()
76 gauge = Gauge(name, description, multiprocess_mode="mostrecent")
77 _set_hacky_gauges_funcs.append((gauge, f))
78 return gauge
81active_users_gauges = [
82 _make_gauge_from_query(
83 f"couchers_active_users_{name}",
84 f"Number of active users in the last {description}",
85 (select(func.count()).select_from(User).where(User.is_visible).where(User.last_active > func.now() - interval)),
86 )
87 for name, description, interval in [
88 ("5m", "5 min", timedelta(minutes=5)),
89 ("24h", "24 hours", timedelta(hours=24)),
90 ("1month", "1 month", timedelta(days=31)),
91 ("3month", "3 months", timedelta(days=92)),
92 ("6month", "6 months", timedelta(days=183)),
93 ("12month", "12 months", timedelta(days=365)),
94 ]
95]
97users_gauge = _make_gauge_from_query(
98 "couchers_users", "Total number of users", select(func.count()).select_from(User).where(User.is_visible)
99)
101man_gauge = _make_gauge_from_query(
102 "couchers_users_man",
103 "Total number of users with gender 'Man'",
104 select(func.count()).select_from(User).where(User.is_visible).where(User.gender == "Man"),
105)
107woman_gauge = _make_gauge_from_query(
108 "couchers_users_woman",
109 "Total number of users with gender 'Woman'",
110 select(func.count()).select_from(User).where(User.is_visible).where(User.gender == "Woman"),
111)
113nonbinary_gauge = _make_gauge_from_query(
114 "couchers_users_nonbinary",
115 "Total number of users with gender 'Non-binary'",
116 select(func.count()).select_from(User).where(User.is_visible).where(User.gender == "Non-binary"),
117)
119can_host_gauge = _make_gauge_from_query(
120 "couchers_users_can_host",
121 "Total number of users with hosting status 'can_host'",
122 select(func.count()).select_from(User).where(User.is_visible).where(User.hosting_status == HostingStatus.can_host),
123)
125cant_host_gauge = _make_gauge_from_query(
126 "couchers_users_cant_host",
127 "Total number of users with hosting status 'cant_host'",
128 select(func.count()).select_from(User).where(User.is_visible).where(User.hosting_status == HostingStatus.cant_host),
129)
131maybe_gauge = _make_gauge_from_query(
132 "couchers_users_maybe",
133 "Total number of users with hosting status 'maybe'",
134 select(func.count()).select_from(User).where(User.is_visible).where(User.hosting_status == HostingStatus.maybe),
135)
137completed_profile_gauge = _make_gauge_from_query(
138 "couchers_users_completed_profile",
139 "Total number of users with a completed profile",
140 select(func.count()).select_from(User).where(User.is_visible).where(User.has_completed_profile),
141)
143completed_my_home_gauge = _make_gauge_from_query(
144 "couchers_users_completed_my_home",
145 "Total number of users with a completed my home section",
146 select(func.count()).select_from(User).where(User.is_visible).where(User.has_completed_my_home),
147)
149sent_message_gauge = _make_gauge_from_query(
150 "couchers_users_sent_message",
151 "Total number of users who have sent a message",
152 (select(func.count(distinct(Message.author_id))).join(User, User.id == Message.author_id).where(User.is_visible)),
153)
155sent_request_gauge = _make_gauge_from_query(
156 "couchers_users_sent_request",
157 "Total number of users who have sent a host request",
158 (
159 select(func.count(distinct(HostRequest.surfer_user_id)))
160 .join(User, User.id == HostRequest.surfer_user_id)
161 .where(User.is_visible)
162 ),
163)
165has_reference_gauge = _make_gauge_from_query(
166 "couchers_users_has_reference",
167 "Total number of users who have a reference",
168 (
169 select(func.count(distinct(Reference.to_user_id)))
170 .join(User, User.id == Reference.to_user_id)
171 .where(User.is_visible)
172 ),
173)
175rsvpd_to_event_gauge = _make_gauge_from_query(
176 "couchers_users_rsvpd_to_event",
177 "Total number of users who have RSVPd to an event",
178 (
179 select(func.count(distinct(EventOccurrenceAttendee.user_id)))
180 .join(User, User.id == EventOccurrenceAttendee.user_id)
181 .where(User.is_visible)
182 ),
183)
185background_jobs_ready_to_execute_gauge = _make_gauge_from_query(
186 "couchers_background_jobs_ready_to_execute",
187 "Total number of background jobs ready to execute",
188 select(func.count()).select_from(BackgroundJob).where(BackgroundJob.ready_for_retry),
189)
191background_jobs_serialization_errors_counter = Counter(
192 "couchers_background_jobs_serialization_errors_total",
193 "Number of times a bg worker has a serialization error",
194)
196background_jobs_no_jobs_counter = Counter(
197 "couchers_background_jobs_no_jobs_total",
198 "Number of times a bg worker tries to grab a job but there is none",
199)
201background_jobs_got_job_counter = Counter(
202 "couchers_background_jobs_got_job_total",
203 "Number of times a bg worker grabbed a job",
204)
207signup_initiations_counter = Counter(
208 "couchers_signup_initiations_total",
209 "Number of initiated signups",
210)
211signup_completions_counter = Counter(
212 "couchers_signup_completions_total",
213 "Number of completed signups",
214 labelnames=["gender"],
215)
216signup_time_histogram = Histogram(
217 "couchers_signup_time_seconds",
218 "Time taken for a user to sign up",
219 labelnames=["gender"],
220 buckets=(30, 60, 90, 120, 180, 240, 300, 360, 420, 480, 540, 600, 900, 1200, 1800, 3600, 7200, _INF),
221)
223logins_counter = Counter(
224 "couchers_logins_total",
225 "Number of logins",
226 labelnames=["gender"],
227)
229password_reset_initiations_counter = Counter(
230 "couchers_password_reset_initiations_total",
231 "Number of password reset initiations",
232)
233password_reset_completions_counter = Counter(
234 "couchers_password_reset_completions_total",
235 "Number of password reset completions",
236)
238account_deletion_initiations_counter = Counter(
239 "couchers_account_deletion_initiations_total",
240 "Number of account deletion initiations",
241 labelnames=["gender"],
242)
243account_deletion_completions_counter = Counter(
244 "couchers_account_deletion_completions_total",
245 "Number of account deletion completions",
246 labelnames=["gender"],
247)
248account_recoveries_counter = Counter(
249 "couchers_account_recoveries_total",
250 "Number of account recoveries",
251 labelnames=["gender"],
252)
254strong_verification_initiations_counter = Counter(
255 "couchers_strong_verification_initiations_total",
256 "Number of strong verification initiations",
257 labelnames=["gender"],
258)
259strong_verification_completions_counter = Counter(
260 "couchers_strong_verification_completions_total",
261 "Number of strong verification completions",
262)
263strong_verification_data_deletions_counter = Counter(
264 "couchers_strong_verification_data_deletions_total",
265 "Number of strong verification data deletions",
266 labelnames=["gender"],
267)
269host_requests_sent_counter = Counter(
270 "couchers_host_requests_total",
271 "Number of host requests sent",
272 labelnames=["from_gender", "to_gender"],
273)
274host_request_responses_counter = Counter(
275 "couchers_host_requests_responses_total",
276 "Number of responses to host requests",
277 labelnames=["responder_gender", "other_gender", "response_type"],
278)
280sent_messages_counter = Counter(
281 "couchers_sent_messages_total",
282 "Number of messages sent",
283 labelnames=["gender", "message_type"],
284)
287push_notification_counter = Counter(
288 "couchers_push_notification_total",
289 "Number of push notifications sent",
290)
291push_notification_disabled_counter = Counter(
292 "couchers_push_notification_disabled_total",
293 "Number of push notifications that were disabled due to failure to send",
294)
295emails_counter = Counter(
296 "couchers_emails_total",
297 "Number of emails sent",
298)
301recaptchas_assessed_counter = Counter(
302 "couchers_recaptchas_assessed_total",
303 "Number of times a recaptcha assessment is created",
304 labelnames=["action"],
305)
307recaptcha_score_histogram = Histogram(
308 "couchers_recaptcha_score",
309 "Score of recaptcha assessments",
310 labelnames=["action"],
311 buckets=tuple(x / 20 for x in range(0, 21)),
312)
314host_request_first_response_histogram = Histogram(
315 "couchers_host_request_first_response_seconds",
316 "Response time to host requests",
317 labelnames=["host_gender", "surfer_gender", "response_type"],
318 buckets=(
319 1 * 60, # 1m
320 2 * 60, # 2m
321 5 * 60, # 5m
322 10 * 60, # 10m
323 15 * 60, # 15m
324 30 * 60, # 30m
325 45 * 60, # 45m
326 3_600, # 1h
327 2 * 3_600, # 2h
328 3 * 3_600, # 3h
329 6 * 3_600, # 6h
330 12 * 3_600, # 12h
331 86_400, # 24h
332 2 * 86_400, # 2d
333 5 * 86_400, # 4d
334 602_000, # 1w
335 2 * 602_000, # 2w
336 3 * 602_000, # 3w
337 4 * 602_000, # 4w
338 _INF,
339 ),
340)
341account_age_on_host_request_create_histogram = Histogram(
342 "couchers_account_age_on_host_request_create_histogram_seconds",
343 "Age of account sending a host request",
344 labelnames=["surfer_gender", "host_gender"],
345 buckets=(
346 5 * 60, # 5m
347 10 * 60, # 10m
348 15 * 60, # 15m
349 30 * 60, # 30m
350 45 * 60, # 45m
351 3_600, # 1h
352 2 * 3_600, # 2h
353 3 * 3_600, # 3h
354 6 * 3_600, # 6h
355 12 * 3_600, # 12h
356 86_400, # 24h
357 2 * 86_400, # 2d
358 3 * 86_400, # 3d
359 4 * 86_400, # 4d
360 5 * 86_400, # 5d
361 6 * 86_400, # 6d
362 602_000, # 1w
363 2 * 602_000, # 2w
364 3 * 602_000, # 3w
365 4 * 602_000, # 4w
366 5 * 602_000, # 5w
367 10 * 602_000, # 10w
368 25 * 602_000, # 25w
369 52 * 602_000, # 52w
370 104 * 602_000, # 104w
371 _INF,
372 ),
373)
376def create_prometheus_server(port):
377 """custom start method to fix problem descrbied in https://github.com/prometheus/client_python/issues/155"""
379 def app(environ, start_response):
380 # set hacky gauges
381 for gauge, f in _set_hacky_gauges_funcs:
382 gauge.set(f())
384 data = generate_latest(registry)
385 start_response("200 OK", [("Content-type", CONTENT_TYPE_LATEST), ("Content-Length", str(len(data)))])
386 return [data]
388 httpd = exposition.make_server(
389 "", port, app, exposition.ThreadingWSGIServer, handler_class=exposition._SilentHandler
390 )
391 t = threading.Thread(target=httpd.serve_forever)
392 t.daemon = True
393 t.start()
394 return httpd