Coverage for src/couchers/metrics.py: 100%
72 statements
« prev ^ index » next coverage.py v7.6.10, created at 2025-03-24 14:08 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2025-03-24 14:08 +0000
1import threading
2from datetime import timedelta
4from prometheus_client import (
5 CONTENT_TYPE_LATEST,
6 CollectorRegistry,
7 Counter,
8 Gauge,
9 Histogram,
10 exposition,
11 generate_latest,
12 multiprocess,
13)
14from prometheus_client.registry import CollectorRegistry
15from sqlalchemy.sql import func
17from couchers.db import session_scope
18from couchers.models import BackgroundJob, EventOccurrenceAttendee, HostingStatus, HostRequest, Message, Reference, User
19from couchers.sql import couchers_select as select
21registry = CollectorRegistry()
22multiprocess.MultiProcessCollector(registry)
24_INF = float("inf")
26jobs_duration_histogram = Histogram(
27 "couchers_background_jobs_seconds",
28 "Durations of background jobs",
29 labelnames=["job", "status", "attempt", "exception"],
30)
33def observe_in_jobs_duration_histogram(job_type, job_state, try_count, exception_name, duration_s):
34 jobs_duration_histogram.labels(job_type, job_state, str(try_count), exception_name).observe(duration_s)
37jobs_queued_histogram = Histogram(
38 "couchers_background_jobs_queued_seconds",
39 "Time background job spent queued before being picked up",
40 buckets=(0.01, 0.05, 0.1, 0.5, 1.0, 2.5, 5.0, 10, 20, 30, 40, 50, 60, 90, 120, 300, 600, 1800, 3600, _INF),
41)
44servicer_duration_histogram = Histogram(
45 "couchers_servicer_duration_seconds",
46 "Durations of processing gRPC calls",
47 labelnames=["method", "logged_in", "code", "exception"],
48)
51def observe_in_servicer_duration_histogram(method, user_id, status_code, exception_type, duration_s):
52 servicer_duration_histogram.labels(method, user_id is not None, status_code, exception_type).observe(duration_s)
55# list of gauge names and function to execute to set value to
56# the python prometheus client does not support Gauge.set_function, so instead we hack around it and set each gauge just
57# before collection with this
58_set_hacky_gauges_funcs = []
61def _make_gauge_from_query(name, description, statement):
62 """
63 Given a name, description and statement that is a sqlalchemy statement, creates a gauge from it
65 statement should be a sqlalchemy SELECT statement that returns a single number
66 """
68 def f():
69 with session_scope() as session:
70 return session.execute(statement).scalar_one()
72 gauge = Gauge(name, description, multiprocess_mode="mostrecent")
73 _set_hacky_gauges_funcs.append((gauge, f))
74 return gauge
77active_users_gauges = [
78 _make_gauge_from_query(
79 f"couchers_active_users_{name}",
80 f"Number of active users in the last {description}",
81 (select(func.count()).select_from(User).where(User.is_visible).where(User.last_active > func.now() - interval)),
82 )
83 for name, description, interval in [
84 ("5m", "5 min", timedelta(minutes=5)),
85 ("24h", "24 hours", timedelta(hours=24)),
86 ("1month", "1 month", timedelta(days=31)),
87 ("3month", "3 months", timedelta(days=92)),
88 ("6month", "6 months", timedelta(days=183)),
89 ("12month", "12 months", timedelta(days=365)),
90 ]
91]
93users_gauge = _make_gauge_from_query(
94 "couchers_users", "Total number of users", select(func.count()).select_from(User).where(User.is_visible)
95)
97man_gauge = _make_gauge_from_query(
98 "couchers_users_man",
99 "Total number of users with gender 'Man'",
100 select(func.count()).select_from(User).where(User.is_visible).where(User.gender == "Man"),
101)
103woman_gauge = _make_gauge_from_query(
104 "couchers_users_woman",
105 "Total number of users with gender 'Woman'",
106 select(func.count()).select_from(User).where(User.is_visible).where(User.gender == "Woman"),
107)
109nonbinary_gauge = _make_gauge_from_query(
110 "couchers_users_nonbinary",
111 "Total number of users with gender 'Non-binary'",
112 select(func.count()).select_from(User).where(User.is_visible).where(User.gender == "Non-binary"),
113)
115can_host_gauge = _make_gauge_from_query(
116 "couchers_users_can_host",
117 "Total number of users with hosting status 'can_host'",
118 select(func.count()).select_from(User).where(User.is_visible).where(User.hosting_status == HostingStatus.can_host),
119)
121cant_host_gauge = _make_gauge_from_query(
122 "couchers_users_cant_host",
123 "Total number of users with hosting status 'cant_host'",
124 select(func.count()).select_from(User).where(User.is_visible).where(User.hosting_status == HostingStatus.cant_host),
125)
127maybe_gauge = _make_gauge_from_query(
128 "couchers_users_maybe",
129 "Total number of users with hosting status 'maybe'",
130 select(func.count()).select_from(User).where(User.is_visible).where(User.hosting_status == HostingStatus.maybe),
131)
133completed_profile_gauge = _make_gauge_from_query(
134 "couchers_users_completed_profile",
135 "Total number of users with a completed profile",
136 select(func.count()).select_from(User).where(User.is_visible).where(User.has_completed_profile),
137)
139sent_message_gauge = _make_gauge_from_query(
140 "couchers_users_sent_message",
141 "Total number of users who have sent a message",
142 (
143 select(func.count()).select_from(
144 select(User.id)
145 .where(User.is_visible)
146 .join(Message, Message.author_id == User.id)
147 .group_by(User.id)
148 .subquery()
149 )
150 ),
151)
153sent_request_gauge = _make_gauge_from_query(
154 "couchers_users_sent_request",
155 "Total number of users who have sent a host request",
156 (
157 select(func.count()).select_from(
158 select(User.id)
159 .where(User.is_visible)
160 .join(HostRequest, HostRequest.surfer_user_id == User.id)
161 .group_by(User.id)
162 .subquery()
163 )
164 ),
165)
167has_reference_gauge = _make_gauge_from_query(
168 "couchers_users_has_reference",
169 "Total number of users who have a reference",
170 (
171 select(func.count()).select_from(
172 select(User.id)
173 .where(User.is_visible)
174 .join(Reference, Reference.to_user_id == User.id)
175 .group_by(User.id)
176 .subquery()
177 )
178 ),
179)
181rsvpd_to_event_gauge = _make_gauge_from_query(
182 "couchers_users_rsvpd_to_event",
183 "Total number of users who have RSVPd to an event",
184 (
185 select(func.count()).select_from(
186 select(User.id)
187 .where(User.is_visible)
188 .join(EventOccurrenceAttendee, EventOccurrenceAttendee.user_id == User.id)
189 .group_by(User.id)
190 .subquery()
191 )
192 ),
193)
195background_jobs_ready_to_execute_gauge = _make_gauge_from_query(
196 "couchers_background_jobs_ready_to_execute",
197 "Total number of background jobs ready to execute",
198 select(func.count()).select_from(BackgroundJob).where(BackgroundJob.ready_for_retry),
199)
201background_jobs_serialization_errors_counter = Counter(
202 "couchers_background_jobs_serialization_errors_total",
203 "Number of times a bg worker has a serialization error",
204)
206background_jobs_no_jobs_counter = Counter(
207 "couchers_background_jobs_no_jobs_total",
208 "Number of times a bg worker tries to grab a job but there is none",
209)
211background_jobs_got_job_counter = Counter(
212 "couchers_background_jobs_got_job_total",
213 "Number of times a bg worker grabbed a job",
214)
217signup_initiations_counter = Counter(
218 "couchers_signup_initiations_total",
219 "Number of initiated signups",
220)
221signup_completions_counter = Counter(
222 "couchers_signup_completions_total",
223 "Number of completed signups",
224 labelnames=["gender"],
225)
226signup_time_histogram = Histogram(
227 "couchers_signup_time_seconds",
228 "Time taken for a user to sign up",
229 labelnames=["gender"],
230 buckets=(30, 60, 90, 120, 180, 240, 300, 360, 420, 480, 540, 600, 900, 1200, 1800, 3600, 7200, _INF),
231)
233logins_counter = Counter(
234 "couchers_logins_total",
235 "Number of logins",
236 labelnames=["gender"],
237)
239password_reset_initiations_counter = Counter(
240 "couchers_password_reset_initiations_total",
241 "Number of password reset initiations",
242)
243password_reset_completions_counter = Counter(
244 "couchers_password_reset_completions_total",
245 "Number of password reset completions",
246)
248account_deletion_initiations_counter = Counter(
249 "couchers_account_deletion_initiations_total",
250 "Number of account deletion initiations",
251 labelnames=["gender"],
252)
253account_deletion_completions_counter = Counter(
254 "couchers_account_deletion_completions_total",
255 "Number of account deletion completions",
256 labelnames=["gender"],
257)
258account_recoveries_counter = Counter(
259 "couchers_account_recoveries_total",
260 "Number of account recoveries",
261 labelnames=["gender"],
262)
264strong_verification_initiations_counter = Counter(
265 "couchers_strong_verification_initiations_total",
266 "Number of strong verification initiations",
267 labelnames=["gender"],
268)
269strong_verification_completions_counter = Counter(
270 "couchers_strong_verification_completions_total",
271 "Number of strong verification completions",
272)
273strong_verification_data_deletions_counter = Counter(
274 "couchers_strong_verification_data_deletions_total",
275 "Number of strong verification data deletions",
276 labelnames=["gender"],
277)
279host_requests_sent_counter = Counter(
280 "couchers_host_requests_total",
281 "Number of host requests sent",
282 labelnames=["from_gender", "to_gender"],
283)
284host_request_responses_counter = Counter(
285 "couchers_host_requests_responses_total",
286 "Number of responses to host requests",
287 labelnames=["responder_gender", "other_gender", "response_type"],
288)
290sent_messages_counter = Counter(
291 "couchers_sent_messages_total",
292 "Number of messages sent",
293 labelnames=["gender", "message_type"],
294)
297host_request_first_response_histogram = Histogram(
298 "couchers_host_request_first_response_seconds",
299 "Response time to host requests",
300 labelnames=["host_gender", "surfer_gender", "response_type"],
301 buckets=(
302 1 * 60, # 1m
303 2 * 60, # 2m
304 5 * 60, # 5m
305 10 * 60, # 10m
306 15 * 60, # 15m
307 30 * 60, # 30m
308 45 * 60, # 45m
309 3_600, # 1h
310 2 * 3_600, # 2h
311 3 * 3_600, # 3h
312 6 * 3_600, # 6h
313 12 * 3_600, # 12h
314 86_400, # 24h
315 2 * 86_400, # 2d
316 5 * 86_400, # 4d
317 602_000, # 1w
318 2 * 602_000, # 2w
319 3 * 602_000, # 3w
320 4 * 602_000, # 4w
321 _INF,
322 ),
323)
324account_age_on_host_request_create_histogram = Histogram(
325 "couchers_account_age_on_host_request_create_histogram_seconds",
326 "Age of account sending a host request",
327 labelnames=["surfer_gender", "host_gender"],
328 buckets=(
329 5 * 60, # 5m
330 10 * 60, # 10m
331 15 * 60, # 15m
332 30 * 60, # 30m
333 45 * 60, # 45m
334 3_600, # 1h
335 2 * 3_600, # 2h
336 3 * 3_600, # 3h
337 6 * 3_600, # 6h
338 12 * 3_600, # 12h
339 86_400, # 24h
340 2 * 86_400, # 2d
341 3 * 86_400, # 3d
342 4 * 86_400, # 4d
343 5 * 86_400, # 5d
344 6 * 86_400, # 6d
345 602_000, # 1w
346 2 * 602_000, # 2w
347 3 * 602_000, # 3w
348 4 * 602_000, # 4w
349 5 * 602_000, # 5w
350 10 * 602_000, # 10w
351 25 * 602_000, # 25w
352 52 * 602_000, # 52w
353 104 * 602_000, # 104w
354 _INF,
355 ),
356)
359def create_prometheus_server(port):
360 """custom start method to fix problem descrbied in https://github.com/prometheus/client_python/issues/155"""
362 def app(environ, start_response):
363 # set hacky gauges
364 for gauge, f in _set_hacky_gauges_funcs:
365 gauge.set(f())
367 data = generate_latest(registry)
368 start_response("200 OK", [("Content-type", CONTENT_TYPE_LATEST), ("Content-Length", str(len(data)))])
369 return [data]
371 httpd = exposition.make_server(
372 "", port, app, exposition.ThreadingWSGIServer, handler_class=exposition._SilentHandler
373 )
374 t = threading.Thread(target=httpd.serve_forever)
375 t.daemon = True
376 t.start()
377 return httpd