Coverage for src/couchers/metrics.py: 100%

1import threading

2from datetime import timedelta

4from opentelemetry import trace

5from prometheus_client import (

6 CONTENT_TYPE_LATEST,

7 CollectorRegistry,

8 Counter,

9 Gauge,

10 Histogram,

11 exposition,

12 generate_latest,

13 multiprocess,

14)

15from prometheus_client.registry import CollectorRegistry

16from sqlalchemy.sql import distinct, func

18from couchers.db import session_scope

19from couchers.models import BackgroundJob, EventOccurrenceAttendee, HostingStatus, HostRequest, Message, Reference, User

20from couchers.sql import couchers_select as select

22trace = trace.get_tracer(__name__)

24registry = CollectorRegistry()

25multiprocess.MultiProcessCollector(registry)

27_INF = float("inf")

29jobs_duration_histogram = Histogram(

30 "couchers_background_jobs_seconds",

31 "Durations of background jobs",

32 labelnames=["job", "status", "attempt", "exception"],

33)

36def observe_in_jobs_duration_histogram(job_type, job_state, try_count, exception_name, duration_s):

37 jobs_duration_histogram.labels(job_type, job_state, str(try_count), exception_name).observe(duration_s)

40jobs_queued_histogram = Histogram(

41 "couchers_background_jobs_queued_seconds",

42 "Time background job spent queued before being picked up",

43 buckets=(0.01, 0.05, 0.1, 0.5, 1.0, 2.5, 5.0, 10, 20, 30, 40, 50, 60, 90, 120, 300, 600, 1800, 3600, _INF),

44)

47servicer_duration_histogram = Histogram(

48 "couchers_servicer_duration_seconds",

49 "Durations of processing gRPC calls",

50 labelnames=["method", "logged_in", "code", "exception"],

51)

54def observe_in_servicer_duration_histogram(method, user_id, status_code, exception_type, duration_s):

55 servicer_duration_histogram.labels(method, user_id is not None, status_code, exception_type).observe(duration_s)

58# list of gauge names and function to execute to set value to

59# the python prometheus client does not support Gauge.set_function, so instead we hack around it and set each gauge just

60# before collection with this

61_set_hacky_gauges_funcs = []

64def _make_gauge_from_query(name, description, statement):

65 """

66 Given a name, description and statement that is a sqlalchemy statement, creates a gauge from it

68 statement should be a sqlalchemy SELECT statement that returns a single number

69 """

71 def f():

72 with trace.start_as_current_span(f"metric.{name}"):

73 with session_scope() as session:

74 return session.execute(statement).scalar_one()

76 gauge = Gauge(name, description, multiprocess_mode="mostrecent")

77 _set_hacky_gauges_funcs.append((gauge, f))

78 return gauge

81active_users_gauges = [

82 _make_gauge_from_query(

83 f"couchers_active_users_{name}",

84 f"Number of active users in the last {description}",

85 (select(func.count()).select_from(User).where(User.is_visible).where(User.last_active > func.now() - interval)),

86 )

87 for name, description, interval in [

88 ("5m", "5 min", timedelta(minutes=5)),

89 ("24h", "24 hours", timedelta(hours=24)),

90 ("1month", "1 month", timedelta(days=31)),

91 ("3month", "3 months", timedelta(days=92)),

92 ("6month", "6 months", timedelta(days=183)),

93 ("12month", "12 months", timedelta(days=365)),

94 ]

95]

97users_gauge = _make_gauge_from_query(

98 "couchers_users", "Total number of users", select(func.count()).select_from(User).where(User.is_visible)

99)

100

101man_gauge = _make_gauge_from_query(

102 "couchers_users_man",

103 "Total number of users with gender 'Man'",

104 select(func.count()).select_from(User).where(User.is_visible).where(User.gender == "Man"),

105)

106

107woman_gauge = _make_gauge_from_query(

108 "couchers_users_woman",

109 "Total number of users with gender 'Woman'",

110 select(func.count()).select_from(User).where(User.is_visible).where(User.gender == "Woman"),

111)

112

113nonbinary_gauge = _make_gauge_from_query(

114 "couchers_users_nonbinary",

115 "Total number of users with gender 'Non-binary'",

116 select(func.count()).select_from(User).where(User.is_visible).where(User.gender == "Non-binary"),

117)

118

119can_host_gauge = _make_gauge_from_query(

120 "couchers_users_can_host",

121 "Total number of users with hosting status 'can_host'",

122 select(func.count()).select_from(User).where(User.is_visible).where(User.hosting_status == HostingStatus.can_host),

123)

124

125cant_host_gauge = _make_gauge_from_query(

126 "couchers_users_cant_host",

127 "Total number of users with hosting status 'cant_host'",

128 select(func.count()).select_from(User).where(User.is_visible).where(User.hosting_status == HostingStatus.cant_host),

129)

130

131maybe_gauge = _make_gauge_from_query(

132 "couchers_users_maybe",

133 "Total number of users with hosting status 'maybe'",

134 select(func.count()).select_from(User).where(User.is_visible).where(User.hosting_status == HostingStatus.maybe),

135)

136

137completed_profile_gauge = _make_gauge_from_query(

138 "couchers_users_completed_profile",

139 "Total number of users with a completed profile",

140 select(func.count()).select_from(User).where(User.is_visible).where(User.has_completed_profile),

141)

142

143sent_message_gauge = _make_gauge_from_query(

144 "couchers_users_sent_message",

145 "Total number of users who have sent a message",

146 (select(func.count(distinct(Message.author_id))).join(User, User.id == Message.author_id).where(User.is_visible)),

147)

148

149sent_request_gauge = _make_gauge_from_query(

150 "couchers_users_sent_request",

151 "Total number of users who have sent a host request",

152 (

153 select(func.count(distinct(HostRequest.surfer_user_id)))

154 .join(User, User.id == HostRequest.surfer_user_id)

155 .where(User.is_visible)

156 ),

157)

158

159has_reference_gauge = _make_gauge_from_query(

160 "couchers_users_has_reference",

161 "Total number of users who have a reference",

162 (

163 select(func.count(distinct(Reference.to_user_id)))

164 .join(User, User.id == Reference.to_user_id)

165 .where(User.is_visible)

166 ),

167)

168

169rsvpd_to_event_gauge = _make_gauge_from_query(

170 "couchers_users_rsvpd_to_event",

171 "Total number of users who have RSVPd to an event",

172 (

173 select(func.count(distinct(EventOccurrenceAttendee.user_id)))

174 .join(User, User.id == EventOccurrenceAttendee.user_id)

175 .where(User.is_visible)

176 ),

177)

178

179background_jobs_ready_to_execute_gauge = _make_gauge_from_query(

180 "couchers_background_jobs_ready_to_execute",

181 "Total number of background jobs ready to execute",

182 select(func.count()).select_from(BackgroundJob).where(BackgroundJob.ready_for_retry),

183)

184

185background_jobs_serialization_errors_counter = Counter(

186 "couchers_background_jobs_serialization_errors_total",

187 "Number of times a bg worker has a serialization error",

188)

189

190background_jobs_no_jobs_counter = Counter(

191 "couchers_background_jobs_no_jobs_total",

192 "Number of times a bg worker tries to grab a job but there is none",

193)

194

195background_jobs_got_job_counter = Counter(

196 "couchers_background_jobs_got_job_total",

197 "Number of times a bg worker grabbed a job",

198)

199

200

201signup_initiations_counter = Counter(

202 "couchers_signup_initiations_total",

203 "Number of initiated signups",

204)

205signup_completions_counter = Counter(

206 "couchers_signup_completions_total",

207 "Number of completed signups",

208 labelnames=["gender"],

209)

210signup_time_histogram = Histogram(

211 "couchers_signup_time_seconds",

212 "Time taken for a user to sign up",

213 labelnames=["gender"],

214 buckets=(30, 60, 90, 120, 180, 240, 300, 360, 420, 480, 540, 600, 900, 1200, 1800, 3600, 7200, _INF),

215)

216

217logins_counter = Counter(

218 "couchers_logins_total",

219 "Number of logins",

220 labelnames=["gender"],

221)

222

223password_reset_initiations_counter = Counter(

224 "couchers_password_reset_initiations_total",

225 "Number of password reset initiations",

226)

227password_reset_completions_counter = Counter(

228 "couchers_password_reset_completions_total",

229 "Number of password reset completions",

230)

231

232account_deletion_initiations_counter = Counter(

233 "couchers_account_deletion_initiations_total",

234 "Number of account deletion initiations",

235 labelnames=["gender"],

236)

237account_deletion_completions_counter = Counter(

238 "couchers_account_deletion_completions_total",

239 "Number of account deletion completions",

240 labelnames=["gender"],

241)

242account_recoveries_counter = Counter(

243 "couchers_account_recoveries_total",

244 "Number of account recoveries",

245 labelnames=["gender"],

246)

247

248strong_verification_initiations_counter = Counter(

249 "couchers_strong_verification_initiations_total",

250 "Number of strong verification initiations",

251 labelnames=["gender"],

252)

253strong_verification_completions_counter = Counter(

254 "couchers_strong_verification_completions_total",

255 "Number of strong verification completions",

256)

257strong_verification_data_deletions_counter = Counter(

258 "couchers_strong_verification_data_deletions_total",

259 "Number of strong verification data deletions",

260 labelnames=["gender"],

261)

262

263host_requests_sent_counter = Counter(

264 "couchers_host_requests_total",

265 "Number of host requests sent",

266 labelnames=["from_gender", "to_gender"],

267)

268host_request_responses_counter = Counter(

269 "couchers_host_requests_responses_total",

270 "Number of responses to host requests",

271 labelnames=["responder_gender", "other_gender", "response_type"],

272)

273

274sent_messages_counter = Counter(

275 "couchers_sent_messages_total",

276 "Number of messages sent",

277 labelnames=["gender", "message_type"],

278)

279

280

281push_notification_counter = Counter(

282 "couchers_push_notification_total",

283 "Number of push notifications sent",

284)

285push_notification_disabled_counter = Counter(

286 "couchers_push_notification_disabled_total",

287 "Number of push notifications that were disabled due to failure to send",

288)

289emails_counter = Counter(

290 "couchers_emails_total",

291 "Number of emails sent",

292)

293

294

295recaptchas_assessed_counter = Counter(

296 "couchers_recaptchas_assessed_total",

297 "Number of times a recaptcha assessment is created",

298 labelnames=["action"],

299)

300

301recaptcha_score_histogram = Histogram(

302 "couchers_recaptcha_score",

303 "Score of recaptcha assessments",

304 labelnames=["action"],

305 buckets=tuple(x / 20 for x in range(0, 21)),

306)

307

308host_request_first_response_histogram = Histogram(

309 "couchers_host_request_first_response_seconds",

310 "Response time to host requests",

311 labelnames=["host_gender", "surfer_gender", "response_type"],

312 buckets=(

313 1 * 60, # 1m

314 2 * 60, # 2m

315 5 * 60, # 5m

316 10 * 60, # 10m

317 15 * 60, # 15m

318 30 * 60, # 30m

319 45 * 60, # 45m

320 3_600, # 1h

321 2 * 3_600, # 2h

322 3 * 3_600, # 3h

323 6 * 3_600, # 6h

324 12 * 3_600, # 12h

325 86_400, # 24h

326 2 * 86_400, # 2d

327 5 * 86_400, # 4d

328 602_000, # 1w

329 2 * 602_000, # 2w

330 3 * 602_000, # 3w

331 4 * 602_000, # 4w

332 _INF,

333 ),

334)

335account_age_on_host_request_create_histogram = Histogram(

336 "couchers_account_age_on_host_request_create_histogram_seconds",

337 "Age of account sending a host request",

338 labelnames=["surfer_gender", "host_gender"],

339 buckets=(

340 5 * 60, # 5m

341 10 * 60, # 10m

342 15 * 60, # 15m

343 30 * 60, # 30m

344 45 * 60, # 45m

345 3_600, # 1h

346 2 * 3_600, # 2h

347 3 * 3_600, # 3h

348 6 * 3_600, # 6h

349 12 * 3_600, # 12h

350 86_400, # 24h

351 2 * 86_400, # 2d

352 3 * 86_400, # 3d

353 4 * 86_400, # 4d

354 5 * 86_400, # 5d

355 6 * 86_400, # 6d

356 602_000, # 1w

357 2 * 602_000, # 2w

358 3 * 602_000, # 3w

359 4 * 602_000, # 4w

360 5 * 602_000, # 5w

361 10 * 602_000, # 10w

362 25 * 602_000, # 25w

363 52 * 602_000, # 52w

364 104 * 602_000, # 104w

365 _INF,

366 ),

367)

368

369

370def create_prometheus_server(port):

371 """custom start method to fix problem descrbied in https://github.com/prometheus/client_python/issues/155"""

372

373 def app(environ, start_response):

374 # set hacky gauges

375 for gauge, f in _set_hacky_gauges_funcs:

376 gauge.set(f())

377

378 data = generate_latest(registry)

379 start_response("200 OK", [("Content-type", CONTENT_TYPE_LATEST), ("Content-Length", str(len(data)))])

380 return [data]

381

382 httpd = exposition.make_server(

383 "", port, app, exposition.ThreadingWSGIServer, handler_class=exposition._SilentHandler

384 )

385 t = threading.Thread(target=httpd.serve_forever)

386 t.daemon = True

387 t.start()

388 return httpd