Coverage for src/couchers/metrics.py: 100%
80 statements
« prev ^ index » next coverage.py v7.6.10, created at 2025-07-05 23:21 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2025-07-05 23:21 +0000
1import threading
2from datetime import timedelta
4from opentelemetry import trace
5from prometheus_client import (
6 CONTENT_TYPE_LATEST,
7 CollectorRegistry,
8 Counter,
9 Gauge,
10 Histogram,
11 exposition,
12 generate_latest,
13 multiprocess,
14)
15from prometheus_client.registry import CollectorRegistry
16from sqlalchemy.sql import distinct, func
18from couchers.db import session_scope
19from couchers.models import BackgroundJob, EventOccurrenceAttendee, HostingStatus, HostRequest, Message, Reference, User
20from couchers.sql import couchers_select as select
22trace = trace.get_tracer(__name__)
24registry = CollectorRegistry()
25multiprocess.MultiProcessCollector(registry)
27_INF = float("inf")
29jobs_duration_histogram = Histogram(
30 "couchers_background_jobs_seconds",
31 "Durations of background jobs",
32 labelnames=["job", "status", "attempt", "exception"],
33)
36def observe_in_jobs_duration_histogram(job_type, job_state, try_count, exception_name, duration_s):
37 jobs_duration_histogram.labels(job_type, job_state, str(try_count), exception_name).observe(duration_s)
40jobs_queued_histogram = Histogram(
41 "couchers_background_jobs_queued_seconds",
42 "Time background job spent queued before being picked up",
43 buckets=(0.01, 0.05, 0.1, 0.5, 1.0, 2.5, 5.0, 10, 20, 30, 40, 50, 60, 90, 120, 300, 600, 1800, 3600, _INF),
44)
47servicer_duration_histogram = Histogram(
48 "couchers_servicer_duration_seconds",
49 "Durations of processing gRPC calls",
50 labelnames=["method", "logged_in", "code", "exception"],
51)
54def observe_in_servicer_duration_histogram(method, user_id, status_code, exception_type, duration_s):
55 servicer_duration_histogram.labels(method, user_id is not None, status_code, exception_type).observe(duration_s)
58# list of gauge names and function to execute to set value to
59# the python prometheus client does not support Gauge.set_function, so instead we hack around it and set each gauge just
60# before collection with this
61_set_hacky_gauges_funcs = []
64def _make_gauge_from_query(name, description, statement):
65 """
66 Given a name, description and statement that is a sqlalchemy statement, creates a gauge from it
68 statement should be a sqlalchemy SELECT statement that returns a single number
69 """
71 def f():
72 with trace.start_as_current_span(f"metric.{name}"):
73 with session_scope() as session:
74 return session.execute(statement).scalar_one()
76 gauge = Gauge(name, description, multiprocess_mode="mostrecent")
77 _set_hacky_gauges_funcs.append((gauge, f))
78 return gauge
81active_users_gauges = [
82 _make_gauge_from_query(
83 f"couchers_active_users_{name}",
84 f"Number of active users in the last {description}",
85 (select(func.count()).select_from(User).where(User.is_visible).where(User.last_active > func.now() - interval)),
86 )
87 for name, description, interval in [
88 ("5m", "5 min", timedelta(minutes=5)),
89 ("24h", "24 hours", timedelta(hours=24)),
90 ("1month", "1 month", timedelta(days=31)),
91 ("3month", "3 months", timedelta(days=92)),
92 ("6month", "6 months", timedelta(days=183)),
93 ("12month", "12 months", timedelta(days=365)),
94 ]
95]
97users_gauge = _make_gauge_from_query(
98 "couchers_users", "Total number of users", select(func.count()).select_from(User).where(User.is_visible)
99)
101man_gauge = _make_gauge_from_query(
102 "couchers_users_man",
103 "Total number of users with gender 'Man'",
104 select(func.count()).select_from(User).where(User.is_visible).where(User.gender == "Man"),
105)
107woman_gauge = _make_gauge_from_query(
108 "couchers_users_woman",
109 "Total number of users with gender 'Woman'",
110 select(func.count()).select_from(User).where(User.is_visible).where(User.gender == "Woman"),
111)
113nonbinary_gauge = _make_gauge_from_query(
114 "couchers_users_nonbinary",
115 "Total number of users with gender 'Non-binary'",
116 select(func.count()).select_from(User).where(User.is_visible).where(User.gender == "Non-binary"),
117)
119can_host_gauge = _make_gauge_from_query(
120 "couchers_users_can_host",
121 "Total number of users with hosting status 'can_host'",
122 select(func.count()).select_from(User).where(User.is_visible).where(User.hosting_status == HostingStatus.can_host),
123)
125cant_host_gauge = _make_gauge_from_query(
126 "couchers_users_cant_host",
127 "Total number of users with hosting status 'cant_host'",
128 select(func.count()).select_from(User).where(User.is_visible).where(User.hosting_status == HostingStatus.cant_host),
129)
131maybe_gauge = _make_gauge_from_query(
132 "couchers_users_maybe",
133 "Total number of users with hosting status 'maybe'",
134 select(func.count()).select_from(User).where(User.is_visible).where(User.hosting_status == HostingStatus.maybe),
135)
137completed_profile_gauge = _make_gauge_from_query(
138 "couchers_users_completed_profile",
139 "Total number of users with a completed profile",
140 select(func.count()).select_from(User).where(User.is_visible).where(User.has_completed_profile),
141)
143sent_message_gauge = _make_gauge_from_query(
144 "couchers_users_sent_message",
145 "Total number of users who have sent a message",
146 (select(func.count(distinct(Message.author_id))).join(User, User.id == Message.author_id).where(User.is_visible)),
147)
149sent_request_gauge = _make_gauge_from_query(
150 "couchers_users_sent_request",
151 "Total number of users who have sent a host request",
152 (
153 select(func.count(distinct(HostRequest.surfer_user_id)))
154 .join(User, User.id == HostRequest.surfer_user_id)
155 .where(User.is_visible)
156 ),
157)
159has_reference_gauge = _make_gauge_from_query(
160 "couchers_users_has_reference",
161 "Total number of users who have a reference",
162 (
163 select(func.count(distinct(Reference.to_user_id)))
164 .join(User, User.id == Reference.to_user_id)
165 .where(User.is_visible)
166 ),
167)
169rsvpd_to_event_gauge = _make_gauge_from_query(
170 "couchers_users_rsvpd_to_event",
171 "Total number of users who have RSVPd to an event",
172 (
173 select(func.count(distinct(EventOccurrenceAttendee.user_id)))
174 .join(User, User.id == EventOccurrenceAttendee.user_id)
175 .where(User.is_visible)
176 ),
177)
179background_jobs_ready_to_execute_gauge = _make_gauge_from_query(
180 "couchers_background_jobs_ready_to_execute",
181 "Total number of background jobs ready to execute",
182 select(func.count()).select_from(BackgroundJob).where(BackgroundJob.ready_for_retry),
183)
185background_jobs_serialization_errors_counter = Counter(
186 "couchers_background_jobs_serialization_errors_total",
187 "Number of times a bg worker has a serialization error",
188)
190background_jobs_no_jobs_counter = Counter(
191 "couchers_background_jobs_no_jobs_total",
192 "Number of times a bg worker tries to grab a job but there is none",
193)
195background_jobs_got_job_counter = Counter(
196 "couchers_background_jobs_got_job_total",
197 "Number of times a bg worker grabbed a job",
198)
201signup_initiations_counter = Counter(
202 "couchers_signup_initiations_total",
203 "Number of initiated signups",
204)
205signup_completions_counter = Counter(
206 "couchers_signup_completions_total",
207 "Number of completed signups",
208 labelnames=["gender"],
209)
210signup_time_histogram = Histogram(
211 "couchers_signup_time_seconds",
212 "Time taken for a user to sign up",
213 labelnames=["gender"],
214 buckets=(30, 60, 90, 120, 180, 240, 300, 360, 420, 480, 540, 600, 900, 1200, 1800, 3600, 7200, _INF),
215)
217logins_counter = Counter(
218 "couchers_logins_total",
219 "Number of logins",
220 labelnames=["gender"],
221)
223password_reset_initiations_counter = Counter(
224 "couchers_password_reset_initiations_total",
225 "Number of password reset initiations",
226)
227password_reset_completions_counter = Counter(
228 "couchers_password_reset_completions_total",
229 "Number of password reset completions",
230)
232account_deletion_initiations_counter = Counter(
233 "couchers_account_deletion_initiations_total",
234 "Number of account deletion initiations",
235 labelnames=["gender"],
236)
237account_deletion_completions_counter = Counter(
238 "couchers_account_deletion_completions_total",
239 "Number of account deletion completions",
240 labelnames=["gender"],
241)
242account_recoveries_counter = Counter(
243 "couchers_account_recoveries_total",
244 "Number of account recoveries",
245 labelnames=["gender"],
246)
248strong_verification_initiations_counter = Counter(
249 "couchers_strong_verification_initiations_total",
250 "Number of strong verification initiations",
251 labelnames=["gender"],
252)
253strong_verification_completions_counter = Counter(
254 "couchers_strong_verification_completions_total",
255 "Number of strong verification completions",
256)
257strong_verification_data_deletions_counter = Counter(
258 "couchers_strong_verification_data_deletions_total",
259 "Number of strong verification data deletions",
260 labelnames=["gender"],
261)
263host_requests_sent_counter = Counter(
264 "couchers_host_requests_total",
265 "Number of host requests sent",
266 labelnames=["from_gender", "to_gender"],
267)
268host_request_responses_counter = Counter(
269 "couchers_host_requests_responses_total",
270 "Number of responses to host requests",
271 labelnames=["responder_gender", "other_gender", "response_type"],
272)
274sent_messages_counter = Counter(
275 "couchers_sent_messages_total",
276 "Number of messages sent",
277 labelnames=["gender", "message_type"],
278)
281push_notification_counter = Counter(
282 "couchers_push_notification_total",
283 "Number of push notifications sent",
284)
285push_notification_disabled_counter = Counter(
286 "couchers_push_notification_disabled_total",
287 "Number of push notifications that were disabled due to failure to send",
288)
289emails_counter = Counter(
290 "couchers_emails_total",
291 "Number of emails sent",
292)
295recaptchas_assessed_counter = Counter(
296 "couchers_recaptchas_assessed_total",
297 "Number of times a recaptcha assessment is created",
298 labelnames=["action"],
299)
301recaptcha_score_histogram = Histogram(
302 "couchers_recaptcha_score",
303 "Score of recaptcha assessments",
304 labelnames=["action"],
305 buckets=tuple(x / 20 for x in range(0, 21)),
306)
308host_request_first_response_histogram = Histogram(
309 "couchers_host_request_first_response_seconds",
310 "Response time to host requests",
311 labelnames=["host_gender", "surfer_gender", "response_type"],
312 buckets=(
313 1 * 60, # 1m
314 2 * 60, # 2m
315 5 * 60, # 5m
316 10 * 60, # 10m
317 15 * 60, # 15m
318 30 * 60, # 30m
319 45 * 60, # 45m
320 3_600, # 1h
321 2 * 3_600, # 2h
322 3 * 3_600, # 3h
323 6 * 3_600, # 6h
324 12 * 3_600, # 12h
325 86_400, # 24h
326 2 * 86_400, # 2d
327 5 * 86_400, # 4d
328 602_000, # 1w
329 2 * 602_000, # 2w
330 3 * 602_000, # 3w
331 4 * 602_000, # 4w
332 _INF,
333 ),
334)
335account_age_on_host_request_create_histogram = Histogram(
336 "couchers_account_age_on_host_request_create_histogram_seconds",
337 "Age of account sending a host request",
338 labelnames=["surfer_gender", "host_gender"],
339 buckets=(
340 5 * 60, # 5m
341 10 * 60, # 10m
342 15 * 60, # 15m
343 30 * 60, # 30m
344 45 * 60, # 45m
345 3_600, # 1h
346 2 * 3_600, # 2h
347 3 * 3_600, # 3h
348 6 * 3_600, # 6h
349 12 * 3_600, # 12h
350 86_400, # 24h
351 2 * 86_400, # 2d
352 3 * 86_400, # 3d
353 4 * 86_400, # 4d
354 5 * 86_400, # 5d
355 6 * 86_400, # 6d
356 602_000, # 1w
357 2 * 602_000, # 2w
358 3 * 602_000, # 3w
359 4 * 602_000, # 4w
360 5 * 602_000, # 5w
361 10 * 602_000, # 10w
362 25 * 602_000, # 25w
363 52 * 602_000, # 52w
364 104 * 602_000, # 104w
365 _INF,
366 ),
367)
370def create_prometheus_server(port):
371 """custom start method to fix problem descrbied in https://github.com/prometheus/client_python/issues/155"""
373 def app(environ, start_response):
374 # set hacky gauges
375 for gauge, f in _set_hacky_gauges_funcs:
376 gauge.set(f())
378 data = generate_latest(registry)
379 start_response("200 OK", [("Content-type", CONTENT_TYPE_LATEST), ("Content-Length", str(len(data)))])
380 return [data]
382 httpd = exposition.make_server(
383 "", port, app, exposition.ThreadingWSGIServer, handler_class=exposition._SilentHandler
384 )
385 t = threading.Thread(target=httpd.serve_forever)
386 t.daemon = True
387 t.start()
388 return httpd