Coverage for src/couchers/metrics.py: 100%

72 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2025-03-24 14:08 +0000

1import threading 

2from datetime import timedelta 

3 

4from prometheus_client import ( 

5 CONTENT_TYPE_LATEST, 

6 CollectorRegistry, 

7 Counter, 

8 Gauge, 

9 Histogram, 

10 exposition, 

11 generate_latest, 

12 multiprocess, 

13) 

14from prometheus_client.registry import CollectorRegistry 

15from sqlalchemy.sql import func 

16 

17from couchers.db import session_scope 

18from couchers.models import BackgroundJob, EventOccurrenceAttendee, HostingStatus, HostRequest, Message, Reference, User 

19from couchers.sql import couchers_select as select 

20 

21registry = CollectorRegistry() 

22multiprocess.MultiProcessCollector(registry) 

23 

24_INF = float("inf") 

25 

26jobs_duration_histogram = Histogram( 

27 "couchers_background_jobs_seconds", 

28 "Durations of background jobs", 

29 labelnames=["job", "status", "attempt", "exception"], 

30) 

31 

32 

33def observe_in_jobs_duration_histogram(job_type, job_state, try_count, exception_name, duration_s): 

34 jobs_duration_histogram.labels(job_type, job_state, str(try_count), exception_name).observe(duration_s) 

35 

36 

37jobs_queued_histogram = Histogram( 

38 "couchers_background_jobs_queued_seconds", 

39 "Time background job spent queued before being picked up", 

40 buckets=(0.01, 0.05, 0.1, 0.5, 1.0, 2.5, 5.0, 10, 20, 30, 40, 50, 60, 90, 120, 300, 600, 1800, 3600, _INF), 

41) 

42 

43 

44servicer_duration_histogram = Histogram( 

45 "couchers_servicer_duration_seconds", 

46 "Durations of processing gRPC calls", 

47 labelnames=["method", "logged_in", "code", "exception"], 

48) 

49 

50 

51def observe_in_servicer_duration_histogram(method, user_id, status_code, exception_type, duration_s): 

52 servicer_duration_histogram.labels(method, user_id is not None, status_code, exception_type).observe(duration_s) 

53 

54 

55# list of gauge names and function to execute to set value to 

56# the python prometheus client does not support Gauge.set_function, so instead we hack around it and set each gauge just 

57# before collection with this 

58_set_hacky_gauges_funcs = [] 

59 

60 

61def _make_gauge_from_query(name, description, statement): 

62 """ 

63 Given a name, description and statement that is a sqlalchemy statement, creates a gauge from it 

64 

65 statement should be a sqlalchemy SELECT statement that returns a single number 

66 """ 

67 

68 def f(): 

69 with session_scope() as session: 

70 return session.execute(statement).scalar_one() 

71 

72 gauge = Gauge(name, description, multiprocess_mode="mostrecent") 

73 _set_hacky_gauges_funcs.append((gauge, f)) 

74 return gauge 

75 

76 

77active_users_gauges = [ 

78 _make_gauge_from_query( 

79 f"couchers_active_users_{name}", 

80 f"Number of active users in the last {description}", 

81 (select(func.count()).select_from(User).where(User.is_visible).where(User.last_active > func.now() - interval)), 

82 ) 

83 for name, description, interval in [ 

84 ("5m", "5 min", timedelta(minutes=5)), 

85 ("24h", "24 hours", timedelta(hours=24)), 

86 ("1month", "1 month", timedelta(days=31)), 

87 ("3month", "3 months", timedelta(days=92)), 

88 ("6month", "6 months", timedelta(days=183)), 

89 ("12month", "12 months", timedelta(days=365)), 

90 ] 

91] 

92 

93users_gauge = _make_gauge_from_query( 

94 "couchers_users", "Total number of users", select(func.count()).select_from(User).where(User.is_visible) 

95) 

96 

97man_gauge = _make_gauge_from_query( 

98 "couchers_users_man", 

99 "Total number of users with gender 'Man'", 

100 select(func.count()).select_from(User).where(User.is_visible).where(User.gender == "Man"), 

101) 

102 

103woman_gauge = _make_gauge_from_query( 

104 "couchers_users_woman", 

105 "Total number of users with gender 'Woman'", 

106 select(func.count()).select_from(User).where(User.is_visible).where(User.gender == "Woman"), 

107) 

108 

109nonbinary_gauge = _make_gauge_from_query( 

110 "couchers_users_nonbinary", 

111 "Total number of users with gender 'Non-binary'", 

112 select(func.count()).select_from(User).where(User.is_visible).where(User.gender == "Non-binary"), 

113) 

114 

115can_host_gauge = _make_gauge_from_query( 

116 "couchers_users_can_host", 

117 "Total number of users with hosting status 'can_host'", 

118 select(func.count()).select_from(User).where(User.is_visible).where(User.hosting_status == HostingStatus.can_host), 

119) 

120 

121cant_host_gauge = _make_gauge_from_query( 

122 "couchers_users_cant_host", 

123 "Total number of users with hosting status 'cant_host'", 

124 select(func.count()).select_from(User).where(User.is_visible).where(User.hosting_status == HostingStatus.cant_host), 

125) 

126 

127maybe_gauge = _make_gauge_from_query( 

128 "couchers_users_maybe", 

129 "Total number of users with hosting status 'maybe'", 

130 select(func.count()).select_from(User).where(User.is_visible).where(User.hosting_status == HostingStatus.maybe), 

131) 

132 

133completed_profile_gauge = _make_gauge_from_query( 

134 "couchers_users_completed_profile", 

135 "Total number of users with a completed profile", 

136 select(func.count()).select_from(User).where(User.is_visible).where(User.has_completed_profile), 

137) 

138 

139sent_message_gauge = _make_gauge_from_query( 

140 "couchers_users_sent_message", 

141 "Total number of users who have sent a message", 

142 ( 

143 select(func.count()).select_from( 

144 select(User.id) 

145 .where(User.is_visible) 

146 .join(Message, Message.author_id == User.id) 

147 .group_by(User.id) 

148 .subquery() 

149 ) 

150 ), 

151) 

152 

153sent_request_gauge = _make_gauge_from_query( 

154 "couchers_users_sent_request", 

155 "Total number of users who have sent a host request", 

156 ( 

157 select(func.count()).select_from( 

158 select(User.id) 

159 .where(User.is_visible) 

160 .join(HostRequest, HostRequest.surfer_user_id == User.id) 

161 .group_by(User.id) 

162 .subquery() 

163 ) 

164 ), 

165) 

166 

167has_reference_gauge = _make_gauge_from_query( 

168 "couchers_users_has_reference", 

169 "Total number of users who have a reference", 

170 ( 

171 select(func.count()).select_from( 

172 select(User.id) 

173 .where(User.is_visible) 

174 .join(Reference, Reference.to_user_id == User.id) 

175 .group_by(User.id) 

176 .subquery() 

177 ) 

178 ), 

179) 

180 

181rsvpd_to_event_gauge = _make_gauge_from_query( 

182 "couchers_users_rsvpd_to_event", 

183 "Total number of users who have RSVPd to an event", 

184 ( 

185 select(func.count()).select_from( 

186 select(User.id) 

187 .where(User.is_visible) 

188 .join(EventOccurrenceAttendee, EventOccurrenceAttendee.user_id == User.id) 

189 .group_by(User.id) 

190 .subquery() 

191 ) 

192 ), 

193) 

194 

195background_jobs_ready_to_execute_gauge = _make_gauge_from_query( 

196 "couchers_background_jobs_ready_to_execute", 

197 "Total number of background jobs ready to execute", 

198 select(func.count()).select_from(BackgroundJob).where(BackgroundJob.ready_for_retry), 

199) 

200 

201background_jobs_serialization_errors_counter = Counter( 

202 "couchers_background_jobs_serialization_errors_total", 

203 "Number of times a bg worker has a serialization error", 

204) 

205 

206background_jobs_no_jobs_counter = Counter( 

207 "couchers_background_jobs_no_jobs_total", 

208 "Number of times a bg worker tries to grab a job but there is none", 

209) 

210 

211background_jobs_got_job_counter = Counter( 

212 "couchers_background_jobs_got_job_total", 

213 "Number of times a bg worker grabbed a job", 

214) 

215 

216 

217signup_initiations_counter = Counter( 

218 "couchers_signup_initiations_total", 

219 "Number of initiated signups", 

220) 

221signup_completions_counter = Counter( 

222 "couchers_signup_completions_total", 

223 "Number of completed signups", 

224 labelnames=["gender"], 

225) 

226signup_time_histogram = Histogram( 

227 "couchers_signup_time_seconds", 

228 "Time taken for a user to sign up", 

229 labelnames=["gender"], 

230 buckets=(30, 60, 90, 120, 180, 240, 300, 360, 420, 480, 540, 600, 900, 1200, 1800, 3600, 7200, _INF), 

231) 

232 

233logins_counter = Counter( 

234 "couchers_logins_total", 

235 "Number of logins", 

236 labelnames=["gender"], 

237) 

238 

239password_reset_initiations_counter = Counter( 

240 "couchers_password_reset_initiations_total", 

241 "Number of password reset initiations", 

242) 

243password_reset_completions_counter = Counter( 

244 "couchers_password_reset_completions_total", 

245 "Number of password reset completions", 

246) 

247 

248account_deletion_initiations_counter = Counter( 

249 "couchers_account_deletion_initiations_total", 

250 "Number of account deletion initiations", 

251 labelnames=["gender"], 

252) 

253account_deletion_completions_counter = Counter( 

254 "couchers_account_deletion_completions_total", 

255 "Number of account deletion completions", 

256 labelnames=["gender"], 

257) 

258account_recoveries_counter = Counter( 

259 "couchers_account_recoveries_total", 

260 "Number of account recoveries", 

261 labelnames=["gender"], 

262) 

263 

264strong_verification_initiations_counter = Counter( 

265 "couchers_strong_verification_initiations_total", 

266 "Number of strong verification initiations", 

267 labelnames=["gender"], 

268) 

269strong_verification_completions_counter = Counter( 

270 "couchers_strong_verification_completions_total", 

271 "Number of strong verification completions", 

272) 

273strong_verification_data_deletions_counter = Counter( 

274 "couchers_strong_verification_data_deletions_total", 

275 "Number of strong verification data deletions", 

276 labelnames=["gender"], 

277) 

278 

279host_requests_sent_counter = Counter( 

280 "couchers_host_requests_total", 

281 "Number of host requests sent", 

282 labelnames=["from_gender", "to_gender"], 

283) 

284host_request_responses_counter = Counter( 

285 "couchers_host_requests_responses_total", 

286 "Number of responses to host requests", 

287 labelnames=["responder_gender", "other_gender", "response_type"], 

288) 

289 

290sent_messages_counter = Counter( 

291 "couchers_sent_messages_total", 

292 "Number of messages sent", 

293 labelnames=["gender", "message_type"], 

294) 

295 

296 

297host_request_first_response_histogram = Histogram( 

298 "couchers_host_request_first_response_seconds", 

299 "Response time to host requests", 

300 labelnames=["host_gender", "surfer_gender", "response_type"], 

301 buckets=( 

302 1 * 60, # 1m 

303 2 * 60, # 2m 

304 5 * 60, # 5m 

305 10 * 60, # 10m 

306 15 * 60, # 15m 

307 30 * 60, # 30m 

308 45 * 60, # 45m 

309 3_600, # 1h 

310 2 * 3_600, # 2h 

311 3 * 3_600, # 3h 

312 6 * 3_600, # 6h 

313 12 * 3_600, # 12h 

314 86_400, # 24h 

315 2 * 86_400, # 2d 

316 5 * 86_400, # 4d 

317 602_000, # 1w 

318 2 * 602_000, # 2w 

319 3 * 602_000, # 3w 

320 4 * 602_000, # 4w 

321 _INF, 

322 ), 

323) 

324account_age_on_host_request_create_histogram = Histogram( 

325 "couchers_account_age_on_host_request_create_histogram_seconds", 

326 "Age of account sending a host request", 

327 labelnames=["surfer_gender", "host_gender"], 

328 buckets=( 

329 5 * 60, # 5m 

330 10 * 60, # 10m 

331 15 * 60, # 15m 

332 30 * 60, # 30m 

333 45 * 60, # 45m 

334 3_600, # 1h 

335 2 * 3_600, # 2h 

336 3 * 3_600, # 3h 

337 6 * 3_600, # 6h 

338 12 * 3_600, # 12h 

339 86_400, # 24h 

340 2 * 86_400, # 2d 

341 3 * 86_400, # 3d 

342 4 * 86_400, # 4d 

343 5 * 86_400, # 5d 

344 6 * 86_400, # 6d 

345 602_000, # 1w 

346 2 * 602_000, # 2w 

347 3 * 602_000, # 3w 

348 4 * 602_000, # 4w 

349 5 * 602_000, # 5w 

350 10 * 602_000, # 10w 

351 25 * 602_000, # 25w 

352 52 * 602_000, # 52w 

353 104 * 602_000, # 104w 

354 _INF, 

355 ), 

356) 

357 

358 

359def create_prometheus_server(port): 

360 """custom start method to fix problem descrbied in https://github.com/prometheus/client_python/issues/155""" 

361 

362 def app(environ, start_response): 

363 # set hacky gauges 

364 for gauge, f in _set_hacky_gauges_funcs: 

365 gauge.set(f()) 

366 

367 data = generate_latest(registry) 

368 start_response("200 OK", [("Content-type", CONTENT_TYPE_LATEST), ("Content-Length", str(len(data)))]) 

369 return [data] 

370 

371 httpd = exposition.make_server( 

372 "", port, app, exposition.ThreadingWSGIServer, handler_class=exposition._SilentHandler 

373 ) 

374 t = threading.Thread(target=httpd.serve_forever) 

375 t.daemon = True 

376 t.start() 

377 return httpd