Coverage for src/couchers/metrics.py: 100%

80 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2025-07-05 23:21 +0000

1import threading 

2from datetime import timedelta 

3 

4from opentelemetry import trace 

5from prometheus_client import ( 

6 CONTENT_TYPE_LATEST, 

7 CollectorRegistry, 

8 Counter, 

9 Gauge, 

10 Histogram, 

11 exposition, 

12 generate_latest, 

13 multiprocess, 

14) 

15from prometheus_client.registry import CollectorRegistry 

16from sqlalchemy.sql import distinct, func 

17 

18from couchers.db import session_scope 

19from couchers.models import BackgroundJob, EventOccurrenceAttendee, HostingStatus, HostRequest, Message, Reference, User 

20from couchers.sql import couchers_select as select 

21 

22trace = trace.get_tracer(__name__) 

23 

24registry = CollectorRegistry() 

25multiprocess.MultiProcessCollector(registry) 

26 

27_INF = float("inf") 

28 

29jobs_duration_histogram = Histogram( 

30 "couchers_background_jobs_seconds", 

31 "Durations of background jobs", 

32 labelnames=["job", "status", "attempt", "exception"], 

33) 

34 

35 

36def observe_in_jobs_duration_histogram(job_type, job_state, try_count, exception_name, duration_s): 

37 jobs_duration_histogram.labels(job_type, job_state, str(try_count), exception_name).observe(duration_s) 

38 

39 

40jobs_queued_histogram = Histogram( 

41 "couchers_background_jobs_queued_seconds", 

42 "Time background job spent queued before being picked up", 

43 buckets=(0.01, 0.05, 0.1, 0.5, 1.0, 2.5, 5.0, 10, 20, 30, 40, 50, 60, 90, 120, 300, 600, 1800, 3600, _INF), 

44) 

45 

46 

47servicer_duration_histogram = Histogram( 

48 "couchers_servicer_duration_seconds", 

49 "Durations of processing gRPC calls", 

50 labelnames=["method", "logged_in", "code", "exception"], 

51) 

52 

53 

54def observe_in_servicer_duration_histogram(method, user_id, status_code, exception_type, duration_s): 

55 servicer_duration_histogram.labels(method, user_id is not None, status_code, exception_type).observe(duration_s) 

56 

57 

58# list of gauge names and function to execute to set value to 

59# the python prometheus client does not support Gauge.set_function, so instead we hack around it and set each gauge just 

60# before collection with this 

61_set_hacky_gauges_funcs = [] 

62 

63 

64def _make_gauge_from_query(name, description, statement): 

65 """ 

66 Given a name, description and statement that is a sqlalchemy statement, creates a gauge from it 

67 

68 statement should be a sqlalchemy SELECT statement that returns a single number 

69 """ 

70 

71 def f(): 

72 with trace.start_as_current_span(f"metric.{name}"): 

73 with session_scope() as session: 

74 return session.execute(statement).scalar_one() 

75 

76 gauge = Gauge(name, description, multiprocess_mode="mostrecent") 

77 _set_hacky_gauges_funcs.append((gauge, f)) 

78 return gauge 

79 

80 

81active_users_gauges = [ 

82 _make_gauge_from_query( 

83 f"couchers_active_users_{name}", 

84 f"Number of active users in the last {description}", 

85 (select(func.count()).select_from(User).where(User.is_visible).where(User.last_active > func.now() - interval)), 

86 ) 

87 for name, description, interval in [ 

88 ("5m", "5 min", timedelta(minutes=5)), 

89 ("24h", "24 hours", timedelta(hours=24)), 

90 ("1month", "1 month", timedelta(days=31)), 

91 ("3month", "3 months", timedelta(days=92)), 

92 ("6month", "6 months", timedelta(days=183)), 

93 ("12month", "12 months", timedelta(days=365)), 

94 ] 

95] 

96 

97users_gauge = _make_gauge_from_query( 

98 "couchers_users", "Total number of users", select(func.count()).select_from(User).where(User.is_visible) 

99) 

100 

101man_gauge = _make_gauge_from_query( 

102 "couchers_users_man", 

103 "Total number of users with gender 'Man'", 

104 select(func.count()).select_from(User).where(User.is_visible).where(User.gender == "Man"), 

105) 

106 

107woman_gauge = _make_gauge_from_query( 

108 "couchers_users_woman", 

109 "Total number of users with gender 'Woman'", 

110 select(func.count()).select_from(User).where(User.is_visible).where(User.gender == "Woman"), 

111) 

112 

113nonbinary_gauge = _make_gauge_from_query( 

114 "couchers_users_nonbinary", 

115 "Total number of users with gender 'Non-binary'", 

116 select(func.count()).select_from(User).where(User.is_visible).where(User.gender == "Non-binary"), 

117) 

118 

119can_host_gauge = _make_gauge_from_query( 

120 "couchers_users_can_host", 

121 "Total number of users with hosting status 'can_host'", 

122 select(func.count()).select_from(User).where(User.is_visible).where(User.hosting_status == HostingStatus.can_host), 

123) 

124 

125cant_host_gauge = _make_gauge_from_query( 

126 "couchers_users_cant_host", 

127 "Total number of users with hosting status 'cant_host'", 

128 select(func.count()).select_from(User).where(User.is_visible).where(User.hosting_status == HostingStatus.cant_host), 

129) 

130 

131maybe_gauge = _make_gauge_from_query( 

132 "couchers_users_maybe", 

133 "Total number of users with hosting status 'maybe'", 

134 select(func.count()).select_from(User).where(User.is_visible).where(User.hosting_status == HostingStatus.maybe), 

135) 

136 

137completed_profile_gauge = _make_gauge_from_query( 

138 "couchers_users_completed_profile", 

139 "Total number of users with a completed profile", 

140 select(func.count()).select_from(User).where(User.is_visible).where(User.has_completed_profile), 

141) 

142 

143sent_message_gauge = _make_gauge_from_query( 

144 "couchers_users_sent_message", 

145 "Total number of users who have sent a message", 

146 (select(func.count(distinct(Message.author_id))).join(User, User.id == Message.author_id).where(User.is_visible)), 

147) 

148 

149sent_request_gauge = _make_gauge_from_query( 

150 "couchers_users_sent_request", 

151 "Total number of users who have sent a host request", 

152 ( 

153 select(func.count(distinct(HostRequest.surfer_user_id))) 

154 .join(User, User.id == HostRequest.surfer_user_id) 

155 .where(User.is_visible) 

156 ), 

157) 

158 

159has_reference_gauge = _make_gauge_from_query( 

160 "couchers_users_has_reference", 

161 "Total number of users who have a reference", 

162 ( 

163 select(func.count(distinct(Reference.to_user_id))) 

164 .join(User, User.id == Reference.to_user_id) 

165 .where(User.is_visible) 

166 ), 

167) 

168 

169rsvpd_to_event_gauge = _make_gauge_from_query( 

170 "couchers_users_rsvpd_to_event", 

171 "Total number of users who have RSVPd to an event", 

172 ( 

173 select(func.count(distinct(EventOccurrenceAttendee.user_id))) 

174 .join(User, User.id == EventOccurrenceAttendee.user_id) 

175 .where(User.is_visible) 

176 ), 

177) 

178 

179background_jobs_ready_to_execute_gauge = _make_gauge_from_query( 

180 "couchers_background_jobs_ready_to_execute", 

181 "Total number of background jobs ready to execute", 

182 select(func.count()).select_from(BackgroundJob).where(BackgroundJob.ready_for_retry), 

183) 

184 

185background_jobs_serialization_errors_counter = Counter( 

186 "couchers_background_jobs_serialization_errors_total", 

187 "Number of times a bg worker has a serialization error", 

188) 

189 

190background_jobs_no_jobs_counter = Counter( 

191 "couchers_background_jobs_no_jobs_total", 

192 "Number of times a bg worker tries to grab a job but there is none", 

193) 

194 

195background_jobs_got_job_counter = Counter( 

196 "couchers_background_jobs_got_job_total", 

197 "Number of times a bg worker grabbed a job", 

198) 

199 

200 

201signup_initiations_counter = Counter( 

202 "couchers_signup_initiations_total", 

203 "Number of initiated signups", 

204) 

205signup_completions_counter = Counter( 

206 "couchers_signup_completions_total", 

207 "Number of completed signups", 

208 labelnames=["gender"], 

209) 

210signup_time_histogram = Histogram( 

211 "couchers_signup_time_seconds", 

212 "Time taken for a user to sign up", 

213 labelnames=["gender"], 

214 buckets=(30, 60, 90, 120, 180, 240, 300, 360, 420, 480, 540, 600, 900, 1200, 1800, 3600, 7200, _INF), 

215) 

216 

217logins_counter = Counter( 

218 "couchers_logins_total", 

219 "Number of logins", 

220 labelnames=["gender"], 

221) 

222 

223password_reset_initiations_counter = Counter( 

224 "couchers_password_reset_initiations_total", 

225 "Number of password reset initiations", 

226) 

227password_reset_completions_counter = Counter( 

228 "couchers_password_reset_completions_total", 

229 "Number of password reset completions", 

230) 

231 

232account_deletion_initiations_counter = Counter( 

233 "couchers_account_deletion_initiations_total", 

234 "Number of account deletion initiations", 

235 labelnames=["gender"], 

236) 

237account_deletion_completions_counter = Counter( 

238 "couchers_account_deletion_completions_total", 

239 "Number of account deletion completions", 

240 labelnames=["gender"], 

241) 

242account_recoveries_counter = Counter( 

243 "couchers_account_recoveries_total", 

244 "Number of account recoveries", 

245 labelnames=["gender"], 

246) 

247 

248strong_verification_initiations_counter = Counter( 

249 "couchers_strong_verification_initiations_total", 

250 "Number of strong verification initiations", 

251 labelnames=["gender"], 

252) 

253strong_verification_completions_counter = Counter( 

254 "couchers_strong_verification_completions_total", 

255 "Number of strong verification completions", 

256) 

257strong_verification_data_deletions_counter = Counter( 

258 "couchers_strong_verification_data_deletions_total", 

259 "Number of strong verification data deletions", 

260 labelnames=["gender"], 

261) 

262 

263host_requests_sent_counter = Counter( 

264 "couchers_host_requests_total", 

265 "Number of host requests sent", 

266 labelnames=["from_gender", "to_gender"], 

267) 

268host_request_responses_counter = Counter( 

269 "couchers_host_requests_responses_total", 

270 "Number of responses to host requests", 

271 labelnames=["responder_gender", "other_gender", "response_type"], 

272) 

273 

274sent_messages_counter = Counter( 

275 "couchers_sent_messages_total", 

276 "Number of messages sent", 

277 labelnames=["gender", "message_type"], 

278) 

279 

280 

281push_notification_counter = Counter( 

282 "couchers_push_notification_total", 

283 "Number of push notifications sent", 

284) 

285push_notification_disabled_counter = Counter( 

286 "couchers_push_notification_disabled_total", 

287 "Number of push notifications that were disabled due to failure to send", 

288) 

289emails_counter = Counter( 

290 "couchers_emails_total", 

291 "Number of emails sent", 

292) 

293 

294 

295recaptchas_assessed_counter = Counter( 

296 "couchers_recaptchas_assessed_total", 

297 "Number of times a recaptcha assessment is created", 

298 labelnames=["action"], 

299) 

300 

301recaptcha_score_histogram = Histogram( 

302 "couchers_recaptcha_score", 

303 "Score of recaptcha assessments", 

304 labelnames=["action"], 

305 buckets=tuple(x / 20 for x in range(0, 21)), 

306) 

307 

308host_request_first_response_histogram = Histogram( 

309 "couchers_host_request_first_response_seconds", 

310 "Response time to host requests", 

311 labelnames=["host_gender", "surfer_gender", "response_type"], 

312 buckets=( 

313 1 * 60, # 1m 

314 2 * 60, # 2m 

315 5 * 60, # 5m 

316 10 * 60, # 10m 

317 15 * 60, # 15m 

318 30 * 60, # 30m 

319 45 * 60, # 45m 

320 3_600, # 1h 

321 2 * 3_600, # 2h 

322 3 * 3_600, # 3h 

323 6 * 3_600, # 6h 

324 12 * 3_600, # 12h 

325 86_400, # 24h 

326 2 * 86_400, # 2d 

327 5 * 86_400, # 4d 

328 602_000, # 1w 

329 2 * 602_000, # 2w 

330 3 * 602_000, # 3w 

331 4 * 602_000, # 4w 

332 _INF, 

333 ), 

334) 

335account_age_on_host_request_create_histogram = Histogram( 

336 "couchers_account_age_on_host_request_create_histogram_seconds", 

337 "Age of account sending a host request", 

338 labelnames=["surfer_gender", "host_gender"], 

339 buckets=( 

340 5 * 60, # 5m 

341 10 * 60, # 10m 

342 15 * 60, # 15m 

343 30 * 60, # 30m 

344 45 * 60, # 45m 

345 3_600, # 1h 

346 2 * 3_600, # 2h 

347 3 * 3_600, # 3h 

348 6 * 3_600, # 6h 

349 12 * 3_600, # 12h 

350 86_400, # 24h 

351 2 * 86_400, # 2d 

352 3 * 86_400, # 3d 

353 4 * 86_400, # 4d 

354 5 * 86_400, # 5d 

355 6 * 86_400, # 6d 

356 602_000, # 1w 

357 2 * 602_000, # 2w 

358 3 * 602_000, # 3w 

359 4 * 602_000, # 4w 

360 5 * 602_000, # 5w 

361 10 * 602_000, # 10w 

362 25 * 602_000, # 25w 

363 52 * 602_000, # 52w 

364 104 * 602_000, # 104w 

365 _INF, 

366 ), 

367) 

368 

369 

370def create_prometheus_server(port): 

371 """custom start method to fix problem descrbied in https://github.com/prometheus/client_python/issues/155""" 

372 

373 def app(environ, start_response): 

374 # set hacky gauges 

375 for gauge, f in _set_hacky_gauges_funcs: 

376 gauge.set(f()) 

377 

378 data = generate_latest(registry) 

379 start_response("200 OK", [("Content-type", CONTENT_TYPE_LATEST), ("Content-Length", str(len(data)))]) 

380 return [data] 

381 

382 httpd = exposition.make_server( 

383 "", port, app, exposition.ThreadingWSGIServer, handler_class=exposition._SilentHandler 

384 ) 

385 t = threading.Thread(target=httpd.serve_forever) 

386 t.daemon = True 

387 t.start() 

388 return httpd