Coverage for src/couchers/metrics.py: 100%

83 statements  

« prev     ^ index     » next       coverage.py v7.11.0, created at 2025-12-07 17:25 +0000

1import threading 

2from collections.abc import Callable 

3from datetime import timedelta 

4from typing import Any 

5 

6from opentelemetry import trace 

7from prometheus_client import ( 

8 CONTENT_TYPE_LATEST, 

9 CollectorRegistry, 

10 Counter, 

11 Gauge, 

12 Histogram, 

13 exposition, 

14 generate_latest, 

15 multiprocess, 

16) 

17from prometheus_client.registry import CollectorRegistry 

18from sqlalchemy.sql import distinct, func 

19from sqlalchemy.sql.selectable import Select 

20 

21from couchers.db import session_scope 

22from couchers.models import BackgroundJob, EventOccurrenceAttendee, HostingStatus, HostRequest, Message, Reference, User 

23from couchers.sql import couchers_select as select 

24 

25tracer = trace.get_tracer(__name__) 

26 

27registry: CollectorRegistry = CollectorRegistry() 

28multiprocess.MultiProcessCollector(registry) # type: ignore[no-untyped-call] 

29 

30_INF: float = float("inf") 

31 

32jobs_duration_histogram: Histogram = Histogram( 

33 "couchers_background_jobs_seconds", 

34 "Durations of background jobs", 

35 labelnames=["job", "status", "attempt", "exception"], 

36) 

37 

38 

39def observe_in_jobs_duration_histogram( 

40 job_type: str, job_state: str, try_count: int, exception_name: str, duration_s: float 

41) -> None: 

42 jobs_duration_histogram.labels(job_type, job_state, str(try_count), exception_name).observe(duration_s) 

43 

44 

45jobs_queued_histogram: Histogram = Histogram( 

46 "couchers_background_jobs_queued_seconds", 

47 "Time background job spent queued before being picked up", 

48 buckets=(0.01, 0.05, 0.1, 0.5, 1.0, 2.5, 5.0, 10, 20, 30, 40, 50, 60, 90, 120, 300, 600, 1800, 3600, _INF), 

49) 

50 

51 

52servicer_duration_histogram: Histogram = Histogram( 

53 "couchers_servicer_duration_seconds", 

54 "Durations of processing gRPC calls", 

55 labelnames=["method", "logged_in", "code", "exception"], 

56) 

57 

58 

59def observe_in_servicer_duration_histogram( 

60 method: str, user_id: Any, status_code: str, exception_type: str, duration_s: float 

61) -> None: 

62 servicer_duration_histogram.labels(method, user_id is not None, status_code, exception_type).observe(duration_s) 

63 

64 

65# list of gauge names and function to execute to set value to 

66# the python prometheus client does not support Gauge.set_function, so instead we hack around it and set each gauge just 

67# before collection with this 

68_set_hacky_gauges_funcs: list[tuple[Gauge, Callable[[], Any]]] = [] 

69 

70 

71def _make_gauge_from_query(name: str, description: str, statement: Select[Any]) -> Gauge: 

72 """ 

73 Given a name, description and statement that is a sqlalchemy statement, creates a gauge from it 

74 

75 statement should be a sqlalchemy SELECT statement that returns a single number 

76 """ 

77 

78 def f() -> Any: 

79 with tracer.start_as_current_span(f"metric.{name}"): 

80 with session_scope() as session: 

81 return session.execute(statement).scalar_one() 

82 

83 gauge = Gauge(name, description, multiprocess_mode="mostrecent") 

84 _set_hacky_gauges_funcs.append((gauge, f)) 

85 return gauge 

86 

87 

88active_users_gauges: list[Gauge] = [ 

89 _make_gauge_from_query( 

90 f"couchers_active_users_{name}", 

91 f"Number of active users in the last {description}", 

92 (select(func.count()).select_from(User).where(User.is_visible).where(User.last_active > func.now() - interval)), 

93 ) 

94 for name, description, interval in [ 

95 ("5m", "5 min", timedelta(minutes=5)), 

96 ("24h", "24 hours", timedelta(hours=24)), 

97 ("1month", "1 month", timedelta(days=31)), 

98 ("3month", "3 months", timedelta(days=92)), 

99 ("6month", "6 months", timedelta(days=183)), 

100 ("12month", "12 months", timedelta(days=365)), 

101 ] 

102] 

103 

104users_gauge: Gauge = _make_gauge_from_query( 

105 "couchers_users", "Total number of users", select(func.count()).select_from(User).where(User.is_visible) 

106) 

107 

108man_gauge: Gauge = _make_gauge_from_query( 

109 "couchers_users_man", 

110 "Total number of users with gender 'Man'", 

111 select(func.count()).select_from(User).where(User.is_visible).where(User.gender == "Man"), 

112) 

113 

114woman_gauge: Gauge = _make_gauge_from_query( 

115 "couchers_users_woman", 

116 "Total number of users with gender 'Woman'", 

117 select(func.count()).select_from(User).where(User.is_visible).where(User.gender == "Woman"), 

118) 

119 

120nonbinary_gauge: Gauge = _make_gauge_from_query( 

121 "couchers_users_nonbinary", 

122 "Total number of users with gender 'Non-binary'", 

123 select(func.count()).select_from(User).where(User.is_visible).where(User.gender == "Non-binary"), 

124) 

125 

126can_host_gauge: Gauge = _make_gauge_from_query( 

127 "couchers_users_can_host", 

128 "Total number of users with hosting status 'can_host'", 

129 select(func.count()).select_from(User).where(User.is_visible).where(User.hosting_status == HostingStatus.can_host), 

130) 

131 

132cant_host_gauge: Gauge = _make_gauge_from_query( 

133 "couchers_users_cant_host", 

134 "Total number of users with hosting status 'cant_host'", 

135 select(func.count()).select_from(User).where(User.is_visible).where(User.hosting_status == HostingStatus.cant_host), 

136) 

137 

138maybe_gauge: Gauge = _make_gauge_from_query( 

139 "couchers_users_maybe", 

140 "Total number of users with hosting status 'maybe'", 

141 select(func.count()).select_from(User).where(User.is_visible).where(User.hosting_status == HostingStatus.maybe), 

142) 

143 

144completed_profile_gauge: Gauge = _make_gauge_from_query( 

145 "couchers_users_completed_profile", 

146 "Total number of users with a completed profile", 

147 select(func.count()).select_from(User).where(User.is_visible).where(User.has_completed_profile), 

148) 

149 

150completed_my_home_gauge: Gauge = _make_gauge_from_query( 

151 "couchers_users_completed_my_home", 

152 "Total number of users with a completed my home section", 

153 select(func.count()).select_from(User).where(User.is_visible).where(User.has_completed_my_home), 

154) 

155 

156sent_message_gauge: Gauge = _make_gauge_from_query( 

157 "couchers_users_sent_message", 

158 "Total number of users who have sent a message", 

159 (select(func.count(distinct(Message.author_id))).join(User, User.id == Message.author_id).where(User.is_visible)), 

160) 

161 

162sent_request_gauge: Gauge = _make_gauge_from_query( 

163 "couchers_users_sent_request", 

164 "Total number of users who have sent a host request", 

165 ( 

166 select(func.count(distinct(HostRequest.surfer_user_id))) 

167 .join(User, User.id == HostRequest.surfer_user_id) 

168 .where(User.is_visible) 

169 ), 

170) 

171 

172has_reference_gauge: Gauge = _make_gauge_from_query( 

173 "couchers_users_has_reference", 

174 "Total number of users who have a reference", 

175 ( 

176 select(func.count(distinct(Reference.to_user_id))) 

177 .join(User, User.id == Reference.to_user_id) 

178 .where(User.is_visible) 

179 ), 

180) 

181 

182rsvpd_to_event_gauge: Gauge = _make_gauge_from_query( 

183 "couchers_users_rsvpd_to_event", 

184 "Total number of users who have RSVPd to an event", 

185 ( 

186 select(func.count(distinct(EventOccurrenceAttendee.user_id))) 

187 .join(User, User.id == EventOccurrenceAttendee.user_id) 

188 .where(User.is_visible) 

189 ), 

190) 

191 

192background_jobs_ready_to_execute_gauge: Gauge = _make_gauge_from_query( 

193 "couchers_background_jobs_ready_to_execute", 

194 "Total number of background jobs ready to execute", 

195 select(func.count()).select_from(BackgroundJob).where(BackgroundJob.ready_for_retry), 

196) 

197 

198background_jobs_serialization_errors_counter: Counter = Counter( 

199 "couchers_background_jobs_serialization_errors_total", 

200 "Number of times a bg worker has a serialization error", 

201) 

202 

203background_jobs_no_jobs_counter: Counter = Counter( 

204 "couchers_background_jobs_no_jobs_total", 

205 "Number of times a bg worker tries to grab a job but there is none", 

206) 

207 

208background_jobs_got_job_counter: Counter = Counter( 

209 "couchers_background_jobs_got_job_total", 

210 "Number of times a bg worker grabbed a job", 

211) 

212 

213 

214signup_initiations_counter: Counter = Counter( 

215 "couchers_signup_initiations_total", 

216 "Number of initiated signups", 

217) 

218signup_completions_counter: Counter = Counter( 

219 "couchers_signup_completions_total", 

220 "Number of completed signups", 

221 labelnames=["gender"], 

222) 

223signup_time_histogram: Histogram = Histogram( 

224 "couchers_signup_time_seconds", 

225 "Time taken for a user to sign up", 

226 labelnames=["gender"], 

227 buckets=(30, 60, 90, 120, 180, 240, 300, 360, 420, 480, 540, 600, 900, 1200, 1800, 3600, 7200, _INF), 

228) 

229 

230logins_counter: Counter = Counter( 

231 "couchers_logins_total", 

232 "Number of logins", 

233 labelnames=["gender"], 

234) 

235 

236password_reset_initiations_counter: Counter = Counter( 

237 "couchers_password_reset_initiations_total", 

238 "Number of password reset initiations", 

239) 

240password_reset_completions_counter: Counter = Counter( 

241 "couchers_password_reset_completions_total", 

242 "Number of password reset completions", 

243) 

244 

245account_deletion_initiations_counter: Counter = Counter( 

246 "couchers_account_deletion_initiations_total", 

247 "Number of account deletion initiations", 

248 labelnames=["gender"], 

249) 

250account_deletion_completions_counter: Counter = Counter( 

251 "couchers_account_deletion_completions_total", 

252 "Number of account deletion completions", 

253 labelnames=["gender"], 

254) 

255account_recoveries_counter: Counter = Counter( 

256 "couchers_account_recoveries_total", 

257 "Number of account recoveries", 

258 labelnames=["gender"], 

259) 

260 

261strong_verification_initiations_counter: Counter = Counter( 

262 "couchers_strong_verification_initiations_total", 

263 "Number of strong verification initiations", 

264 labelnames=["gender"], 

265) 

266strong_verification_completions_counter: Counter = Counter( 

267 "couchers_strong_verification_completions_total", 

268 "Number of strong verification completions", 

269) 

270strong_verification_data_deletions_counter: Counter = Counter( 

271 "couchers_strong_verification_data_deletions_total", 

272 "Number of strong verification data deletions", 

273 labelnames=["gender"], 

274) 

275 

276host_requests_sent_counter: Counter = Counter( 

277 "couchers_host_requests_total", 

278 "Number of host requests sent", 

279 labelnames=["from_gender", "to_gender"], 

280) 

281host_request_responses_counter: Counter = Counter( 

282 "couchers_host_requests_responses_total", 

283 "Number of responses to host requests", 

284 labelnames=["responder_gender", "other_gender", "response_type"], 

285) 

286 

287sent_messages_counter: Counter = Counter( 

288 "couchers_sent_messages_total", 

289 "Number of messages sent", 

290 labelnames=["gender", "message_type"], 

291) 

292 

293 

294push_notification_counter: Counter = Counter( 

295 "couchers_push_notification_total", 

296 "Number of push notification delivery attempts", 

297 labelnames=["platform", "outcome"], 

298) 

299emails_counter: Counter = Counter( 

300 "couchers_emails_total", 

301 "Number of emails sent", 

302) 

303 

304 

305recaptchas_assessed_counter: Counter = Counter( 

306 "couchers_recaptchas_assessed_total", 

307 "Number of times a recaptcha assessment is created", 

308 labelnames=["action"], 

309) 

310 

311recaptcha_score_histogram: Histogram = Histogram( 

312 "couchers_recaptcha_score", 

313 "Score of recaptcha assessments", 

314 labelnames=["action"], 

315 buckets=tuple(x / 20 for x in range(0, 21)), 

316) 

317 

318host_request_first_response_histogram: Histogram = Histogram( 

319 "couchers_host_request_first_response_seconds", 

320 "Response time to host requests", 

321 labelnames=["host_gender", "surfer_gender", "response_type"], 

322 buckets=( 

323 1 * 60, # 1m 

324 2 * 60, # 2m 

325 5 * 60, # 5m 

326 10 * 60, # 10m 

327 15 * 60, # 15m 

328 30 * 60, # 30m 

329 45 * 60, # 45m 

330 3_600, # 1h 

331 2 * 3_600, # 2h 

332 3 * 3_600, # 3h 

333 6 * 3_600, # 6h 

334 12 * 3_600, # 12h 

335 86_400, # 24h 

336 2 * 86_400, # 2d 

337 5 * 86_400, # 4d 

338 602_000, # 1w 

339 2 * 602_000, # 2w 

340 3 * 602_000, # 3w 

341 4 * 602_000, # 4w 

342 _INF, 

343 ), 

344) 

345account_age_on_host_request_create_histogram: Histogram = Histogram( 

346 "couchers_account_age_on_host_request_create_histogram_seconds", 

347 "Age of account sending a host request", 

348 labelnames=["surfer_gender", "host_gender"], 

349 buckets=( 

350 5 * 60, # 5m 

351 10 * 60, # 10m 

352 15 * 60, # 15m 

353 30 * 60, # 30m 

354 45 * 60, # 45m 

355 3_600, # 1h 

356 2 * 3_600, # 2h 

357 3 * 3_600, # 3h 

358 6 * 3_600, # 6h 

359 12 * 3_600, # 12h 

360 86_400, # 24h 

361 2 * 86_400, # 2d 

362 3 * 86_400, # 3d 

363 4 * 86_400, # 4d 

364 5 * 86_400, # 5d 

365 6 * 86_400, # 6d 

366 602_000, # 1w 

367 2 * 602_000, # 2w 

368 3 * 602_000, # 3w 

369 4 * 602_000, # 4w 

370 5 * 602_000, # 5w 

371 10 * 602_000, # 10w 

372 25 * 602_000, # 25w 

373 52 * 602_000, # 52w 

374 104 * 602_000, # 104w 

375 _INF, 

376 ), 

377) 

378 

379 

380def create_prometheus_server(port: int) -> Any: 

381 """custom start method to fix problem descrbied in https://github.com/prometheus/client_python/issues/155""" 

382 

383 def app(environ: Any, start_response: Any) -> Any: 

384 # set hacky gauges 

385 for gauge, f in _set_hacky_gauges_funcs: 

386 gauge.set(f()) 

387 

388 data = generate_latest(registry) 

389 start_response("200 OK", [("Content-type", CONTENT_TYPE_LATEST), ("Content-Length", str(len(data)))]) 

390 return [data] 

391 

392 httpd = exposition.make_server( # type: ignore[attr-defined] 

393 "", port, app, exposition.ThreadingWSGIServer, handler_class=exposition._SilentHandler 

394 ) 

395 t = threading.Thread(target=httpd.serve_forever) 

396 t.daemon = True 

397 t.start() 

398 return httpd