Coverage for src/couchers/metrics.py: 100%

81 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2025-08-28 14:55 +0000

1import threading 

2from datetime import timedelta 

3 

4from opentelemetry import trace 

5from prometheus_client import ( 

6 CONTENT_TYPE_LATEST, 

7 CollectorRegistry, 

8 Counter, 

9 Gauge, 

10 Histogram, 

11 exposition, 

12 generate_latest, 

13 multiprocess, 

14) 

15from prometheus_client.registry import CollectorRegistry 

16from sqlalchemy.sql import distinct, func 

17 

18from couchers.db import session_scope 

19from couchers.models import BackgroundJob, EventOccurrenceAttendee, HostingStatus, HostRequest, Message, Reference, User 

20from couchers.sql import couchers_select as select 

21 

22trace = trace.get_tracer(__name__) 

23 

24registry = CollectorRegistry() 

25multiprocess.MultiProcessCollector(registry) 

26 

27_INF = float("inf") 

28 

29jobs_duration_histogram = Histogram( 

30 "couchers_background_jobs_seconds", 

31 "Durations of background jobs", 

32 labelnames=["job", "status", "attempt", "exception"], 

33) 

34 

35 

36def observe_in_jobs_duration_histogram(job_type, job_state, try_count, exception_name, duration_s): 

37 jobs_duration_histogram.labels(job_type, job_state, str(try_count), exception_name).observe(duration_s) 

38 

39 

40jobs_queued_histogram = Histogram( 

41 "couchers_background_jobs_queued_seconds", 

42 "Time background job spent queued before being picked up", 

43 buckets=(0.01, 0.05, 0.1, 0.5, 1.0, 2.5, 5.0, 10, 20, 30, 40, 50, 60, 90, 120, 300, 600, 1800, 3600, _INF), 

44) 

45 

46 

47servicer_duration_histogram = Histogram( 

48 "couchers_servicer_duration_seconds", 

49 "Durations of processing gRPC calls", 

50 labelnames=["method", "logged_in", "code", "exception"], 

51) 

52 

53 

54def observe_in_servicer_duration_histogram(method, user_id, status_code, exception_type, duration_s): 

55 servicer_duration_histogram.labels(method, user_id is not None, status_code, exception_type).observe(duration_s) 

56 

57 

58# list of gauge names and function to execute to set value to 

59# the python prometheus client does not support Gauge.set_function, so instead we hack around it and set each gauge just 

60# before collection with this 

61_set_hacky_gauges_funcs = [] 

62 

63 

64def _make_gauge_from_query(name, description, statement): 

65 """ 

66 Given a name, description and statement that is a sqlalchemy statement, creates a gauge from it 

67 

68 statement should be a sqlalchemy SELECT statement that returns a single number 

69 """ 

70 

71 def f(): 

72 with trace.start_as_current_span(f"metric.{name}"): 

73 with session_scope() as session: 

74 return session.execute(statement).scalar_one() 

75 

76 gauge = Gauge(name, description, multiprocess_mode="mostrecent") 

77 _set_hacky_gauges_funcs.append((gauge, f)) 

78 return gauge 

79 

80 

81active_users_gauges = [ 

82 _make_gauge_from_query( 

83 f"couchers_active_users_{name}", 

84 f"Number of active users in the last {description}", 

85 (select(func.count()).select_from(User).where(User.is_visible).where(User.last_active > func.now() - interval)), 

86 ) 

87 for name, description, interval in [ 

88 ("5m", "5 min", timedelta(minutes=5)), 

89 ("24h", "24 hours", timedelta(hours=24)), 

90 ("1month", "1 month", timedelta(days=31)), 

91 ("3month", "3 months", timedelta(days=92)), 

92 ("6month", "6 months", timedelta(days=183)), 

93 ("12month", "12 months", timedelta(days=365)), 

94 ] 

95] 

96 

97users_gauge = _make_gauge_from_query( 

98 "couchers_users", "Total number of users", select(func.count()).select_from(User).where(User.is_visible) 

99) 

100 

101man_gauge = _make_gauge_from_query( 

102 "couchers_users_man", 

103 "Total number of users with gender 'Man'", 

104 select(func.count()).select_from(User).where(User.is_visible).where(User.gender == "Man"), 

105) 

106 

107woman_gauge = _make_gauge_from_query( 

108 "couchers_users_woman", 

109 "Total number of users with gender 'Woman'", 

110 select(func.count()).select_from(User).where(User.is_visible).where(User.gender == "Woman"), 

111) 

112 

113nonbinary_gauge = _make_gauge_from_query( 

114 "couchers_users_nonbinary", 

115 "Total number of users with gender 'Non-binary'", 

116 select(func.count()).select_from(User).where(User.is_visible).where(User.gender == "Non-binary"), 

117) 

118 

119can_host_gauge = _make_gauge_from_query( 

120 "couchers_users_can_host", 

121 "Total number of users with hosting status 'can_host'", 

122 select(func.count()).select_from(User).where(User.is_visible).where(User.hosting_status == HostingStatus.can_host), 

123) 

124 

125cant_host_gauge = _make_gauge_from_query( 

126 "couchers_users_cant_host", 

127 "Total number of users with hosting status 'cant_host'", 

128 select(func.count()).select_from(User).where(User.is_visible).where(User.hosting_status == HostingStatus.cant_host), 

129) 

130 

131maybe_gauge = _make_gauge_from_query( 

132 "couchers_users_maybe", 

133 "Total number of users with hosting status 'maybe'", 

134 select(func.count()).select_from(User).where(User.is_visible).where(User.hosting_status == HostingStatus.maybe), 

135) 

136 

137completed_profile_gauge = _make_gauge_from_query( 

138 "couchers_users_completed_profile", 

139 "Total number of users with a completed profile", 

140 select(func.count()).select_from(User).where(User.is_visible).where(User.has_completed_profile), 

141) 

142 

143completed_my_home_gauge = _make_gauge_from_query( 

144 "couchers_users_completed_my_home", 

145 "Total number of users with a completed my home section", 

146 select(func.count()).select_from(User).where(User.is_visible).where(User.has_completed_my_home), 

147) 

148 

149sent_message_gauge = _make_gauge_from_query( 

150 "couchers_users_sent_message", 

151 "Total number of users who have sent a message", 

152 (select(func.count(distinct(Message.author_id))).join(User, User.id == Message.author_id).where(User.is_visible)), 

153) 

154 

155sent_request_gauge = _make_gauge_from_query( 

156 "couchers_users_sent_request", 

157 "Total number of users who have sent a host request", 

158 ( 

159 select(func.count(distinct(HostRequest.surfer_user_id))) 

160 .join(User, User.id == HostRequest.surfer_user_id) 

161 .where(User.is_visible) 

162 ), 

163) 

164 

165has_reference_gauge = _make_gauge_from_query( 

166 "couchers_users_has_reference", 

167 "Total number of users who have a reference", 

168 ( 

169 select(func.count(distinct(Reference.to_user_id))) 

170 .join(User, User.id == Reference.to_user_id) 

171 .where(User.is_visible) 

172 ), 

173) 

174 

175rsvpd_to_event_gauge = _make_gauge_from_query( 

176 "couchers_users_rsvpd_to_event", 

177 "Total number of users who have RSVPd to an event", 

178 ( 

179 select(func.count(distinct(EventOccurrenceAttendee.user_id))) 

180 .join(User, User.id == EventOccurrenceAttendee.user_id) 

181 .where(User.is_visible) 

182 ), 

183) 

184 

185background_jobs_ready_to_execute_gauge = _make_gauge_from_query( 

186 "couchers_background_jobs_ready_to_execute", 

187 "Total number of background jobs ready to execute", 

188 select(func.count()).select_from(BackgroundJob).where(BackgroundJob.ready_for_retry), 

189) 

190 

191background_jobs_serialization_errors_counter = Counter( 

192 "couchers_background_jobs_serialization_errors_total", 

193 "Number of times a bg worker has a serialization error", 

194) 

195 

196background_jobs_no_jobs_counter = Counter( 

197 "couchers_background_jobs_no_jobs_total", 

198 "Number of times a bg worker tries to grab a job but there is none", 

199) 

200 

201background_jobs_got_job_counter = Counter( 

202 "couchers_background_jobs_got_job_total", 

203 "Number of times a bg worker grabbed a job", 

204) 

205 

206 

207signup_initiations_counter = Counter( 

208 "couchers_signup_initiations_total", 

209 "Number of initiated signups", 

210) 

211signup_completions_counter = Counter( 

212 "couchers_signup_completions_total", 

213 "Number of completed signups", 

214 labelnames=["gender"], 

215) 

216signup_time_histogram = Histogram( 

217 "couchers_signup_time_seconds", 

218 "Time taken for a user to sign up", 

219 labelnames=["gender"], 

220 buckets=(30, 60, 90, 120, 180, 240, 300, 360, 420, 480, 540, 600, 900, 1200, 1800, 3600, 7200, _INF), 

221) 

222 

223logins_counter = Counter( 

224 "couchers_logins_total", 

225 "Number of logins", 

226 labelnames=["gender"], 

227) 

228 

229password_reset_initiations_counter = Counter( 

230 "couchers_password_reset_initiations_total", 

231 "Number of password reset initiations", 

232) 

233password_reset_completions_counter = Counter( 

234 "couchers_password_reset_completions_total", 

235 "Number of password reset completions", 

236) 

237 

238account_deletion_initiations_counter = Counter( 

239 "couchers_account_deletion_initiations_total", 

240 "Number of account deletion initiations", 

241 labelnames=["gender"], 

242) 

243account_deletion_completions_counter = Counter( 

244 "couchers_account_deletion_completions_total", 

245 "Number of account deletion completions", 

246 labelnames=["gender"], 

247) 

248account_recoveries_counter = Counter( 

249 "couchers_account_recoveries_total", 

250 "Number of account recoveries", 

251 labelnames=["gender"], 

252) 

253 

254strong_verification_initiations_counter = Counter( 

255 "couchers_strong_verification_initiations_total", 

256 "Number of strong verification initiations", 

257 labelnames=["gender"], 

258) 

259strong_verification_completions_counter = Counter( 

260 "couchers_strong_verification_completions_total", 

261 "Number of strong verification completions", 

262) 

263strong_verification_data_deletions_counter = Counter( 

264 "couchers_strong_verification_data_deletions_total", 

265 "Number of strong verification data deletions", 

266 labelnames=["gender"], 

267) 

268 

269host_requests_sent_counter = Counter( 

270 "couchers_host_requests_total", 

271 "Number of host requests sent", 

272 labelnames=["from_gender", "to_gender"], 

273) 

274host_request_responses_counter = Counter( 

275 "couchers_host_requests_responses_total", 

276 "Number of responses to host requests", 

277 labelnames=["responder_gender", "other_gender", "response_type"], 

278) 

279 

280sent_messages_counter = Counter( 

281 "couchers_sent_messages_total", 

282 "Number of messages sent", 

283 labelnames=["gender", "message_type"], 

284) 

285 

286 

287push_notification_counter = Counter( 

288 "couchers_push_notification_total", 

289 "Number of push notifications sent", 

290) 

291push_notification_disabled_counter = Counter( 

292 "couchers_push_notification_disabled_total", 

293 "Number of push notifications that were disabled due to failure to send", 

294) 

295emails_counter = Counter( 

296 "couchers_emails_total", 

297 "Number of emails sent", 

298) 

299 

300 

301recaptchas_assessed_counter = Counter( 

302 "couchers_recaptchas_assessed_total", 

303 "Number of times a recaptcha assessment is created", 

304 labelnames=["action"], 

305) 

306 

307recaptcha_score_histogram = Histogram( 

308 "couchers_recaptcha_score", 

309 "Score of recaptcha assessments", 

310 labelnames=["action"], 

311 buckets=tuple(x / 20 for x in range(0, 21)), 

312) 

313 

314host_request_first_response_histogram = Histogram( 

315 "couchers_host_request_first_response_seconds", 

316 "Response time to host requests", 

317 labelnames=["host_gender", "surfer_gender", "response_type"], 

318 buckets=( 

319 1 * 60, # 1m 

320 2 * 60, # 2m 

321 5 * 60, # 5m 

322 10 * 60, # 10m 

323 15 * 60, # 15m 

324 30 * 60, # 30m 

325 45 * 60, # 45m 

326 3_600, # 1h 

327 2 * 3_600, # 2h 

328 3 * 3_600, # 3h 

329 6 * 3_600, # 6h 

330 12 * 3_600, # 12h 

331 86_400, # 24h 

332 2 * 86_400, # 2d 

333 5 * 86_400, # 4d 

334 602_000, # 1w 

335 2 * 602_000, # 2w 

336 3 * 602_000, # 3w 

337 4 * 602_000, # 4w 

338 _INF, 

339 ), 

340) 

341account_age_on_host_request_create_histogram = Histogram( 

342 "couchers_account_age_on_host_request_create_histogram_seconds", 

343 "Age of account sending a host request", 

344 labelnames=["surfer_gender", "host_gender"], 

345 buckets=( 

346 5 * 60, # 5m 

347 10 * 60, # 10m 

348 15 * 60, # 15m 

349 30 * 60, # 30m 

350 45 * 60, # 45m 

351 3_600, # 1h 

352 2 * 3_600, # 2h 

353 3 * 3_600, # 3h 

354 6 * 3_600, # 6h 

355 12 * 3_600, # 12h 

356 86_400, # 24h 

357 2 * 86_400, # 2d 

358 3 * 86_400, # 3d 

359 4 * 86_400, # 4d 

360 5 * 86_400, # 5d 

361 6 * 86_400, # 6d 

362 602_000, # 1w 

363 2 * 602_000, # 2w 

364 3 * 602_000, # 3w 

365 4 * 602_000, # 4w 

366 5 * 602_000, # 5w 

367 10 * 602_000, # 10w 

368 25 * 602_000, # 25w 

369 52 * 602_000, # 52w 

370 104 * 602_000, # 104w 

371 _INF, 

372 ), 

373) 

374 

375 

376def create_prometheus_server(port): 

377 """custom start method to fix problem descrbied in https://github.com/prometheus/client_python/issues/155""" 

378 

379 def app(environ, start_response): 

380 # set hacky gauges 

381 for gauge, f in _set_hacky_gauges_funcs: 

382 gauge.set(f()) 

383 

384 data = generate_latest(registry) 

385 start_response("200 OK", [("Content-type", CONTENT_TYPE_LATEST), ("Content-Length", str(len(data)))]) 

386 return [data] 

387 

388 httpd = exposition.make_server( 

389 "", port, app, exposition.ThreadingWSGIServer, handler_class=exposition._SilentHandler 

390 ) 

391 t = threading.Thread(target=httpd.serve_forever) 

392 t.daemon = True 

393 t.start() 

394 return httpd