Coverage for an_website / quotes / utils.py: 53.286%

426 statements  

« prev     ^ index     » next       coverage.py v7.14.0, created at 2026-05-19 18:33 +0000

1# This program is free software: you can redistribute it and/or modify 

2# it under the terms of the GNU Affero General Public License as 

3# published by the Free Software Foundation, either version 3 of the 

4# License, or (at your option) any later version. 

5# 

6# This program is distributed in the hope that it will be useful, 

7# but WITHOUT ANY WARRANTY; without even the implied warranty of 

8# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

9# GNU Affero General Public License for more details. 

10# 

11# You should have received a copy of the GNU Affero General Public License 

12# along with this program. If not, see <https://www.gnu.org/licenses/>. 

13 

14"""A page with wrong quotes.""" 

15 

16import abc 

17import asyncio 

18import contextlib 

19import logging 

20import multiprocessing.synchronize 

21import random 

22import sys 

23import time 

24from collections.abc import ( 

25 Callable, 

26 Iterable, 

27 Mapping, 

28 MutableMapping, 

29 Sequence, 

30) 

31from dataclasses import dataclass 

32from datetime import date 

33from typing import Any, Final, Literal, cast 

34from urllib.parse import urlencode 

35 

36import dill # type: ignore[import-untyped] # nosec: B403 

37import elasticapm 

38import orjson as json 

39import typed_stream 

40from redis.asyncio import Redis 

41from tornado.httpclient import AsyncHTTPClient 

42from tornado.web import Application, HTTPError 

43from UltraDict import UltraDict # type: ignore[import-untyped] 

44 

45from .. import ( 

46 CA_BUNDLE_PATH, 

47 DIR as ROOT_DIR, 

48 EVENT_REDIS, 

49 EVENT_SHUTDOWN, 

50 NAME, 

51 ORJSON_OPTIONS, 

52) 

53from ..utils.request_handler import HTMLRequestHandler 

54from ..utils.utils import ModuleInfo, Permission, ratelimit 

55 

56DIR: Final = ROOT_DIR / "quotes" 

57 

58LOGGER: Final = logging.getLogger(__name__) 

59 

60# TODO: make this configurable and move it into app settings 

61API_URL: str = "https://zitate.prapsschnalinen.de/api" 

62 

63WRONGQUOTE_DELETED: Final[int] = -2 

64WRONGQUOTE_UNKNOWN: Final[int] = -1 

65 

66 

67# pylint: disable-next=too-few-public-methods 

68class UltraDictType[K, V](MutableMapping[K, V], abc.ABC): 

69 """The type of the shared dictionaries.""" 

70 

71 lock: multiprocessing.synchronize.RLock 

72 

73 

74QUOTES_CACHE: Final[UltraDictType[int, Quote]] = UltraDict( 

75 buffer_size=1024**2, serializer=dill 

76) 

77AUTHORS_CACHE: Final[UltraDictType[int, Author]] = UltraDict( 

78 buffer_size=1024**2, serializer=dill 

79) 

80WRONG_QUOTES_CACHE: Final[UltraDictType[tuple[int, int], WrongQuote]] = ( 

81 UltraDict(buffer_size=1024**2, serializer=dill) 

82) 

83 

84 

85@dataclass(init=False, slots=True) 

86class QuotesObjBase(abc.ABC): 

87 """An object with an id.""" 

88 

89 id: int 

90 

91 @classmethod 

92 @abc.abstractmethod 

93 def fetch_all_endpoint(cls) -> Literal["quotes", "authors", "wrongquotes"]: 

94 """Endpoint to fetch all of this type.""" 

95 raise NotImplementedError 

96 

97 @abc.abstractmethod 

98 async def fetch_new_data(self) -> QuotesObjBase | None: 

99 """Fetch new data from the API.""" 

100 raise NotImplementedError 

101 

102 # pylint: disable=unused-argument 

103 def get_id_as_str(self, minify: bool = False) -> str: 

104 """Get the id of the object as a string.""" 

105 return str(self.id) 

106 

107 @abc.abstractmethod 

108 def get_path(self) -> str: 

109 """Return the path to the Object.""" 

110 raise NotImplementedError 

111 

112 

113@dataclass(slots=True) 

114class Author(QuotesObjBase): 

115 """The author object with a name.""" 

116 

117 name: str 

118 # tuple(url_to_info, info_str, creation_date) 

119 info: None | tuple[str, None | str, date] 

120 

121 def __str__(self) -> str: 

122 """Return the name of the author.""" 

123 return self.name 

124 

125 @classmethod 

126 def fetch_all_endpoint(cls) -> Literal["authors"]: 

127 """Endpoint to fetch all authors.""" 

128 return "authors" 

129 

130 async def fetch_new_data(self) -> Author | None: 

131 """Fetch new data from the API.""" 

132 data = await make_api_request( 

133 f"authors/{self.id}", entity_should_exist=True 

134 ) 

135 if data is None: 

136 del AUTHORS_CACHE[self.id] 

137 return None 

138 return parse_author(data) 

139 

140 def get_path(self) -> str: 

141 """Return the path to the author info.""" 

142 return f"/zitate/info/a/{self.id}" 

143 

144 def to_json(self) -> dict[str, Any]: 

145 """Get the author as JSON.""" 

146 return { 

147 "id": self.id, 

148 "name": str(self), 

149 "path": self.get_path(), 

150 "info": ( 

151 { 

152 "source": self.info[0], 

153 "text": self.info[1], 

154 "date": self.info[2].isoformat(), 

155 } 

156 if self.info 

157 else None 

158 ), 

159 } 

160 

161 

162@dataclass(slots=True) 

163class Quote(QuotesObjBase): 

164 """The quote object with a quote text and an author.""" 

165 

166 quote: str 

167 author_id: int 

168 

169 def __str__(self) -> str: 

170 """Return the content of the quote.""" 

171 return self.quote.strip() 

172 

173 @property 

174 def author(self) -> Author: 

175 """Get the corresponding author object.""" 

176 try: 

177 return AUTHORS_CACHE[self.author_id] 

178 except KeyError as err: 

179 LOGGER.error("Author %d was not in cache", self.author_id) 

180 raise HTTPError(404) from err 

181 

182 @classmethod 

183 def fetch_all_endpoint(cls) -> Literal["quotes"]: 

184 """Endpoint to fetch all quotes.""" 

185 return "quotes" 

186 

187 async def fetch_new_data(self) -> Quote | None: 

188 """Fetch new data from the API.""" 

189 data = await make_api_request( 

190 f"quotes/{self.id}", entity_should_exist=True 

191 ) 

192 if data is None: 

193 del QUOTES_CACHE[self.id] 

194 return None 

195 return parse_quote(data, self) 

196 

197 def get_path(self) -> str: 

198 """Return the path to the quote info.""" 

199 return f"/zitate/info/z/{self.id}" 

200 

201 def to_json(self) -> dict[str, Any]: 

202 """Get the quote as JSON.""" 

203 return { 

204 "id": self.id, 

205 "quote": str(self), 

206 "author": self.author.to_json(), 

207 "path": self.get_path(), 

208 } 

209 

210 

211@dataclass(slots=True) 

212class WrongQuote(QuotesObjBase): 

213 """The wrong quote object with a quote, an author and a rating.""" 

214 

215 quote_id: int 

216 author_id: int 

217 rating: int 

218 

219 def __str__(self) -> str: 

220 r""" 

221 Return the wrong quote. 

222 

223 like: '»quote« - author'. 

224 """ 

225 return f"»{self.quote}« - {self.author}" 

226 

227 @property 

228 def author(self) -> Author: 

229 """Get the corresponding author object.""" 

230 try: 

231 return AUTHORS_CACHE[self.author_id] 

232 except KeyError as err: 

233 LOGGER.error("Author %d was not in cache", self.author_id) 

234 raise HTTPError(404) from err 

235 

236 @classmethod 

237 def fetch_all_endpoint(cls) -> Literal["wrongquotes"]: 

238 """Endpoint to fetch all wrong quotes.""" 

239 return "wrongquotes" 

240 

241 async def fetch_new_data(self) -> WrongQuote: 

242 """Fetch new data from the API.""" 

243 if self.id == WRONGQUOTE_UNKNOWN: 

244 api_data = await make_api_request( 

245 "wrongquotes", 

246 { 

247 "quote": str(self.quote_id), 

248 "simulate": "true", 

249 "author": str(self.author_id), 

250 }, 

251 entity_should_exist=True, 

252 ) 

253 if api_data: 

254 api_data = api_data[0] 

255 # pylint: disable-next=confusing-consecutive-elif 

256 elif self.id == WRONGQUOTE_DELETED: 

257 api_data = None 

258 else: 

259 api_data = await make_api_request( 

260 f"wrongquotes/{self.id}", entity_should_exist=True 

261 ) 

262 if not api_data: 

263 self.id = WRONGQUOTE_DELETED 

264 author = await self.author.fetch_new_data() 

265 quote = await self.quote.fetch_new_data() 

266 if author and quote: 

267 self.id = WRONGQUOTE_UNKNOWN 

268 else: 

269 del WRONG_QUOTES_CACHE[(self.quote_id, self.author_id)] 

270 return self 

271 return parse_wrong_quote(api_data, self) 

272 

273 def get_id(self) -> tuple[int, int]: 

274 """ 

275 Get the id of the quote and the author in a tuple. 

276 

277 :return tuple(quote_id, author_id) 

278 """ 

279 return self.quote_id, self.author_id 

280 

281 def get_id_as_str(self, minify: bool = False) -> str: 

282 """ 

283 Get the id of the wrong quote as a string. 

284 

285 Format: quote_id-author_id 

286 """ 

287 if minify and self.id != WRONGQUOTE_UNKNOWN: 

288 return str(self.id) 

289 return f"{self.quote_id}-{self.author_id}" 

290 

291 def get_path(self) -> str: 

292 """Return the path to the wrong quote.""" 

293 return f"/zitate/{self.get_id_as_str()}" 

294 

295 @property 

296 def quote(self) -> Quote: 

297 """Get the corresponding quote object.""" 

298 try: 

299 return QUOTES_CACHE[self.quote_id] 

300 except KeyError as err: 

301 LOGGER.error("Quote %d was not in cache", self.quote_id) 

302 raise HTTPError(404) from err 

303 

304 def to_json(self) -> dict[str, Any]: 

305 """Get the wrong quote as JSON.""" 

306 return { 

307 "id": self.get_id_as_str(), 

308 "quote": self.quote.to_json(), 

309 "author": self.author.to_json(), 

310 "rating": self.rating, 

311 "path": self.get_path(), 

312 } 

313 

314 async def vote( 

315 # pylint: disable=unused-argument 

316 self, 

317 vote: Literal[-1, 1], 

318 lazy: bool = False, 

319 ) -> WrongQuote | None: 

320 """Vote for the wrong quote.""" 

321 if self.id == WRONGQUOTE_UNKNOWN: 

322 raise ValueError("Can't vote for a not existing quote.") 

323 # if lazy: # simulate the vote and do the actual voting later 

324 # self.rating += vote 

325 # asyncio.get_running_loop().call_soon_threadsafe( 

326 # self.vote, 

327 # vote, 

328 # ) 

329 # return self 

330 # do the voting 

331 data = await make_api_request( 

332 f"wrongquotes/{self.id}", 

333 method="POST", 

334 body={"vote": str(vote)}, 

335 entity_should_exist=True, 

336 ) 

337 if data is None: 

338 return await self.fetch_new_data() 

339 

340 return parse_wrong_quote( 

341 data, 

342 self, 

343 ) 

344 

345 

346def get_wrong_quotes( 

347 filter_fun: None | Callable[[WrongQuote], bool] = None, 

348 *, 

349 sort: bool = False, # sorted by rating 

350 filter_real_quotes: bool = True, 

351 shuffle: bool = False, 

352) -> Sequence[WrongQuote]: 

353 """Get cached wrong quotes.""" 

354 if shuffle and sort: 

355 raise ValueError("Sort and shuffle can't be both true.") 

356 

357 iterable: Iterable[WrongQuote] = WRONG_QUOTES_CACHE.values() 

358 if filter_fun: 

359 iterable = filter(filter_fun, iterable) # pylint: disable=bad-builtin 

360 if filter_real_quotes: 

361 iterable = (wq for wq in iterable if wq.quote.author_id != wq.author_id) 

362 wqs = list(iterable) 

363 

364 if shuffle: 

365 random.shuffle(wqs) 

366 elif sort: 

367 wqs.sort(key=lambda wq: wq.rating, reverse=True) 

368 return wqs 

369 

370 

371def get_quotes( 

372 filter_fun: None | Callable[[Quote], bool] = None, 

373 shuffle: bool = False, 

374) -> list[Quote]: 

375 """Get cached quotes.""" 

376 quotes: list[Quote] = list(QUOTES_CACHE.values()) 

377 if filter_fun: 

378 for i in reversed(range(len(quotes))): 

379 if not filter_fun(quotes[i]): 

380 del quotes[i] 

381 if shuffle: 

382 random.shuffle(quotes) 

383 return quotes 

384 

385 

386def get_authors( 

387 filter_fun: None | Callable[[Author], bool] = None, 

388 shuffle: bool = False, 

389) -> list[Author]: 

390 """Get cached authors.""" 

391 authors: list[Author] = list(AUTHORS_CACHE.values()) 

392 if filter_fun: 

393 for i in reversed(range(len(authors))): 

394 if not filter_fun(authors[i]): 

395 del authors[i] 

396 if shuffle: 

397 random.shuffle(authors) 

398 return authors 

399 

400 

401# pylint: disable-next=too-many-arguments 

402async def make_api_request( 

403 endpoint: str, 

404 args: Mapping[str, str] | None = None, 

405 *, 

406 # pylint: disable-next=unused-argument 

407 entity_should_exist: bool, 

408 method: Literal["GET", "POST"] = "GET", 

409 body: None | Mapping[str, str | int] = None, 

410 request_timeout: float | None = None, 

411) -> Any | None: # TODO: list[dict[str, Any]] | dict[str, Any] | None 

412 """Make API request and return the result as dict.""" 

413 query = f"?{urlencode(args)}" if args else "" 

414 url = f"{API_URL}/{endpoint}{query}" 

415 body_str = urlencode(body) if body else body 

416 response = await AsyncHTTPClient().fetch( 

417 url, 

418 method=method, 

419 headers={"Content-Type": "application/x-www-form-urlencoded"}, 

420 body=body_str, 

421 raise_error=False, 

422 ca_certs=CA_BUNDLE_PATH, 

423 request_timeout=request_timeout, 

424 ) 

425 if response.code != 200: 

426 if response.code == 404: 

427 return None 

428 LOGGER.log( 

429 logging.ERROR if response.code >= 500 else logging.WARNING, 

430 "%s request to %r with body=%r failed with code=%d and reason=%r", 

431 method, 

432 url, 

433 body_str, 

434 response.code, 

435 response.reason, 

436 ) 

437 raise HTTPError( 

438 503, 

439 reason=f"{url} returned: {response.code} {response.reason}", 

440 ) 

441 return json.loads(response.body) 

442 

443 

444def fix_author_name(name: str) -> str: 

445 """Fix common mistakes in authors.""" 

446 if len(name) > 2 and name.startswith("(") and name.endswith(")"): 

447 # remove () from author name, that shouldn't be there 

448 name = name[1:-1] 

449 return name.strip() 

450 

451 

452def parse_author(json_data: Mapping[str, Any]) -> Author: 

453 """Parse an author from JSON data.""" 

454 id_ = int(json_data["id"]) 

455 name = fix_author_name(json_data["author"]) 

456 

457 with AUTHORS_CACHE.lock: 

458 author = AUTHORS_CACHE.get(id_) 

459 if author is None: 

460 # pylint: disable-next=too-many-function-args 

461 author = Author(id_, name, None) 

462 elif author.name != name: 

463 author.name = name 

464 author.info = None # reset info 

465 

466 AUTHORS_CACHE[author.id] = author 

467 

468 return author 

469 

470 

471def fix_quote_str(quote_str: str) -> str: 

472 """Fix common mistakes in quotes.""" 

473 if ( 

474 len(quote_str) > 2 

475 and quote_str.startswith(('"', "„", "“")) 

476 and quote_str.endswith(('"', "“", "”")) 

477 ): 

478 # remove quotation marks from quote, that shouldn't be there 

479 quote_str = quote_str[1:-1] 

480 

481 return quote_str.strip() 

482 

483 

484def parse_quote( 

485 json_data: Mapping[str, Any], quote: None | Quote = None 

486) -> Quote: 

487 """Parse a quote from JSON data.""" 

488 quote_id = int(json_data["id"]) 

489 author = parse_author(json_data["author"]) # update author 

490 quote_str = fix_quote_str(json_data["quote"]) 

491 

492 with QUOTES_CACHE.lock: 

493 if quote is None: # no quote supplied, try getting it from cache 

494 quote = QUOTES_CACHE.get(quote_id) 

495 if quote is None: # new quote 

496 # pylint: disable=too-many-function-args 

497 quote = Quote(quote_id, quote_str, author.id) 

498 else: # quote was already saved 

499 quote.quote = quote_str 

500 quote.author_id = author.id 

501 

502 QUOTES_CACHE[quote.id] = quote 

503 

504 return quote 

505 

506 

507def parse_wrong_quote( 

508 json_data: Mapping[str, Any], wrong_quote: None | WrongQuote = None 

509) -> WrongQuote: 

510 """Parse a wrong quote and update the cache.""" 

511 quote = parse_quote(json_data["quote"]) 

512 author = parse_author(json_data["author"]) 

513 

514 id_tuple = (quote.id, author.id) 

515 rating = json_data["rating"] 

516 wrong_quote_id = int(json_data.get("id") or WRONGQUOTE_UNKNOWN) 

517 

518 if wrong_quote: 

519 wrong_quote.id = wrong_quote_id 

520 wrong_quote.rating = rating 

521 

522 with WRONG_QUOTES_CACHE.lock: 

523 wrong_quote = WRONG_QUOTES_CACHE.get(id_tuple, wrong_quote) 

524 if wrong_quote is None: 

525 wrong_quote = WrongQuote( # pylint: disable=unexpected-keyword-arg 

526 id=wrong_quote_id, 

527 quote_id=quote.id, 

528 author_id=author.id, 

529 rating=rating, 

530 ) 

531 else: 

532 wrong_quote.id = wrong_quote_id 

533 wrong_quote.rating = rating 

534 WRONG_QUOTES_CACHE[id_tuple] = wrong_quote 

535 

536 return wrong_quote 

537 

538 

539async def parse_list_of_quote_data[Q: QuotesObjBase]( # noqa: D103 

540 json_list: str | Iterable[Mapping[str, Any]], 

541 parse_fun: Callable[[Mapping[str, Any]], Q], 

542) -> tuple[Q, ...]: 

543 """Parse a list of quote data.""" 

544 if not json_list: 

545 return () 

546 if isinstance(json_list, str): 

547 json_list = cast(list[dict[str, Any]], json.loads(json_list)) 

548 return_list = [] 

549 for json_data in json_list: 

550 _ = parse_fun(json_data) 

551 await asyncio.sleep(0) 

552 return_list.append(_) 

553 return tuple(return_list) 

554 

555 

556async def update_cache_periodically( 

557 app: Application, worker: int | None 

558) -> None: 

559 """Start updating the cache every hour.""" 

560 # pylint: disable=too-complex, too-many-branches 

561 if "/troet" in typed_stream.Stream( 

562 cast(Iterable[ModuleInfo], app.settings.get("MODULE_INFOS", ())) 

563 ).map(lambda m: m.path): 

564 app.settings["SHOW_SHARING_ON_MASTODON"] = True 

565 if worker: 

566 return 

567 with contextlib.suppress(asyncio.TimeoutError): 

568 await asyncio.wait_for(EVENT_REDIS.wait(), 5) 

569 redis: Redis[str] = cast("Redis[str]", app.settings.get("REDIS")) 

570 prefix: str = app.settings.get("REDIS_PREFIX", NAME).removesuffix("-dev") 

571 apm: None | elasticapm.Client 

572 if EVENT_REDIS.is_set(): # pylint: disable=too-many-nested-blocks 

573 await parse_list_of_quote_data( 

574 await redis.get(f"{prefix}:cached-quote-data:authors"), # type: ignore[arg-type] # noqa: B950 

575 parse_author, 

576 ) 

577 await parse_list_of_quote_data( 

578 await redis.get(f"{prefix}:cached-quote-data:quotes"), # type: ignore[arg-type] # noqa: B950 

579 parse_quote, 

580 ) 

581 await parse_list_of_quote_data( 

582 await redis.get(f"{prefix}:cached-quote-data:wrongquotes"), # type: ignore[arg-type] # noqa: B950 

583 parse_wrong_quote, 

584 ) 

585 if QUOTES_CACHE and AUTHORS_CACHE and WRONG_QUOTES_CACHE: 

586 last_update = await redis.get( 

587 f"{prefix}:cached-quote-data:last-update" 

588 ) 

589 if last_update: 

590 last_update_int = int(last_update) 

591 since_last_update = int(time.time()) - last_update_int 

592 if 0 <= since_last_update < 60 * 60: 

593 # wait until the last update is at least one hour old 

594 update_cache_in = 60 * 60 - since_last_update 

595 if not sys.flags.dev_mode and update_cache_in > 60: 

596 # if in production mode update wrong quotes just to be sure 

597 try: 

598 await update_cache( 

599 app, update_quotes=False, update_authors=False 

600 ) 

601 except Exception: # pylint: disable=broad-except 

602 LOGGER.exception("Updating quotes cache failed") 

603 apm = app.settings.get("ELASTIC_APM", {}).get( 

604 "CLIENT" 

605 ) 

606 if apm: 

607 apm.capture_exception() 

608 else: 

609 LOGGER.info("Updated quotes cache successfully") 

610 LOGGER.info( 

611 "Next update of quotes cache in %d seconds", 

612 update_cache_in, 

613 ) 

614 await asyncio.sleep(update_cache_in) 

615 

616 # update the cache every hour 

617 failed = 0 

618 while not EVENT_SHUTDOWN.is_set(): # pylint: disable=while-used 

619 try: 

620 await update_cache(app) 

621 except Exception: # pylint: disable=broad-except 

622 LOGGER.exception("Updating quotes cache failed") 

623 if apm := app.settings.get("ELASTIC_APM", {}).get("CLIENT"): 

624 apm.capture_exception() 

625 failed += 1 

626 await asyncio.sleep(pow(min(failed * 2, 60), 2)) # 4,16,...,60*60 

627 else: 

628 LOGGER.info("Updated quotes cache successfully") 

629 failed = 0 

630 await asyncio.sleep(60 * 60) 

631 

632 

633async def update_cache( # pylint: disable=too-complex,too-many-branches,too-many-locals,too-many-statements # noqa: B950,C901 

634 app: Application, 

635 update_wrong_quotes: bool = True, 

636 update_quotes: bool = True, 

637 update_authors: bool = True, 

638) -> None: 

639 """Fill the cache with all data from the API.""" 

640 LOGGER.info("Updating quotes cache") 

641 redis: Redis[str] = cast("Redis[str]", app.settings.get("REDIS")) 

642 prefix: str = app.settings.get("REDIS_PREFIX", NAME).removesuffix("-dev") 

643 redis_available = EVENT_REDIS.is_set() 

644 exceptions: list[Exception] = [] 

645 

646 if update_wrong_quotes: 

647 try: 

648 await _update_cache(WrongQuote, parse_wrong_quote, redis, prefix) 

649 except Exception as err: # pylint: disable=broad-exception-caught 

650 exceptions.append(err) 

651 

652 deleted_quotes: set[int] = set() 

653 

654 if update_quotes: 

655 try: 

656 quotes = await _update_cache(Quote, parse_quote, redis, prefix) 

657 except Exception as err: # pylint: disable=broad-exception-caught 

658 exceptions.append(err) 

659 else: 

660 with QUOTES_CACHE.lock: 

661 all_quote_ids = {q.id for q in quotes} 

662 max_quote_id = max(all_quote_ids) 

663 old_ids_in_cache = { 

664 _id for _id in QUOTES_CACHE if _id <= max_quote_id 

665 } 

666 deleted_quotes = old_ids_in_cache - all_quote_ids 

667 for _id in deleted_quotes: 

668 del QUOTES_CACHE[_id] 

669 

670 if len(QUOTES_CACHE) < len(quotes): 

671 LOGGER.error("Cache has less elements than just fetched") 

672 

673 deleted_authors: set[int] = set() 

674 

675 if update_authors: 

676 try: 

677 authors = await _update_cache(Author, parse_author, redis, prefix) 

678 except Exception as err: # pylint: disable=broad-exception-caught 

679 exceptions.append(err) 

680 else: 

681 with AUTHORS_CACHE.lock: 

682 all_author_ids = {q.id for q in authors} 

683 max_author_id = max(all_author_ids) 

684 old_ids_in_cache = { 

685 _id for _id in AUTHORS_CACHE if _id <= max_author_id 

686 } 

687 deleted_authors = old_ids_in_cache - all_author_ids 

688 for _id in deleted_authors: 

689 del AUTHORS_CACHE[_id] 

690 

691 if len(AUTHORS_CACHE) < len(authors): 

692 LOGGER.error("Cache has less elements than just fetched") 

693 

694 if deleted_authors or deleted_quotes: 

695 deleted_wrong_quotes: set[tuple[int, int]] = set() 

696 with WRONG_QUOTES_CACHE.lock: 

697 for qid, aid in tuple(WRONG_QUOTES_CACHE): 

698 if qid in deleted_quotes or aid in deleted_authors: 

699 deleted_wrong_quotes.add((qid, aid)) 

700 del WRONG_QUOTES_CACHE[(qid, aid)] 

701 LOGGER.warning( 

702 "Deleted %d wrong quotes: %r", 

703 len(deleted_wrong_quotes), 

704 deleted_wrong_quotes, 

705 ) 

706 

707 if exceptions: 

708 raise ExceptionGroup("Cache could not be updated", exceptions) 

709 

710 if ( 

711 redis_available 

712 and update_wrong_quotes 

713 and update_quotes 

714 and update_authors 

715 ): 

716 await redis.setex( 

717 f"{prefix}:cached-quote-data:last-update", 

718 60 * 60 * 24 * 30, 

719 int(time.time()), 

720 ) 

721 

722 

723async def _update_cache[Q: QuotesObjBase]( 

724 klass: type[Q], 

725 parse: Callable[[Mapping[str, Any]], Q], 

726 redis: Redis[str], 

727 redis_prefix: str, 

728) -> tuple[Q, ...]: 

729 wq_data = await make_api_request( 

730 klass.fetch_all_endpoint(), entity_should_exist=True 

731 ) 

732 if wq_data is None: 

733 LOGGER.error("%s returned 404", klass.fetch_all_endpoint()) 

734 return () 

735 parsed_data = await parse_list_of_quote_data( 

736 wq_data, 

737 parse, 

738 ) 

739 if wq_data and EVENT_REDIS.is_set(): 

740 await redis.setex( 

741 f"{redis_prefix}:cached-quote-data:{klass.fetch_all_endpoint()}", 

742 60 * 60 * 24 * 30, 

743 json.dumps(wq_data, option=ORJSON_OPTIONS), 

744 ) 

745 return parsed_data 

746 

747 

748async def get_author_by_id(author_id: int) -> Author | None: 

749 """Get an author by its id.""" 

750 author = AUTHORS_CACHE.get(author_id) 

751 if author is not None: 

752 return author 

753 data = await make_api_request( 

754 f"authors/{author_id}", entity_should_exist=False 

755 ) 

756 if data is None: 

757 return None 

758 return parse_author(data) 

759 

760 

761async def get_quote_by_id(quote_id: int) -> Quote | None: 

762 """Get a quote by its id.""" 

763 quote = QUOTES_CACHE.get(quote_id) 

764 if quote is not None: 

765 return quote 

766 data = await make_api_request( 

767 f"quotes/{quote_id}", entity_should_exist=False 

768 ) 

769 if data is None: 

770 return None 

771 return parse_quote(data) 

772 

773 

774async def get_wrong_quote( 

775 quote_id: int, author_id: int, use_cache: bool = True 

776) -> WrongQuote | None: 

777 """Get a wrong quote with a quote id and an author id.""" 

778 wrong_quote = WRONG_QUOTES_CACHE.get((quote_id, author_id)) 

779 if wrong_quote: 

780 if use_cache: 

781 return wrong_quote 

782 # do not use cache, so update the wrong quote data 

783 return await wrong_quote.fetch_new_data() 

784 # wrong quote not in cache 

785 if use_cache and quote_id in QUOTES_CACHE and author_id in AUTHORS_CACHE: 

786 # we don't need to request anything, as the wrong_quote probably has 

787 # no ratings just use the cached quote and author 

788 # pylint: disable-next=too-many-function-args 

789 return WrongQuote(WRONGQUOTE_UNKNOWN, quote_id, author_id, 0) 

790 # request the wrong quote from the API 

791 result = await make_api_request( 

792 "wrongquotes", 

793 { 

794 "quote": str(quote_id), 

795 "simulate": "true", 

796 "author": str(author_id), 

797 }, 

798 entity_should_exist=False, 

799 ) 

800 if result: 

801 return parse_wrong_quote(result[0]) 

802 

803 return None 

804 

805 

806async def get_rating_by_id(quote_id: int, author_id: int) -> int | None: 

807 """Get the rating of a wrong quote.""" 

808 if wq := await get_wrong_quote(quote_id, author_id): 

809 return wq.rating 

810 return None 

811 

812 

813def get_random_quote_id() -> int: 

814 """Get random quote id.""" 

815 return random.choice(tuple(QUOTES_CACHE)) 

816 

817 

818def get_random_author_id() -> int: 

819 """Get random author id.""" 

820 return random.choice(tuple(AUTHORS_CACHE)) 

821 

822 

823def get_random_id() -> tuple[int, int]: 

824 """Get random wrong quote id.""" 

825 return ( 

826 get_random_quote_id(), 

827 get_random_author_id(), 

828 ) 

829 

830 

831async def create_wq_and_vote( 

832 vote: Literal[-1, 1], 

833 quote_id: int, 

834 author_id: int, 

835 contributed_by: str, 

836 fast: bool = False, 

837) -> WrongQuote: 

838 """ 

839 Vote for the wrong_quote with the API. 

840 

841 If the wrong_quote doesn't exist yet, create it. 

842 """ 

843 wrong_quote = WRONG_QUOTES_CACHE.get((quote_id, author_id)) 

844 if ( 

845 wrong_quote 

846 and wrong_quote.id != WRONGQUOTE_UNKNOWN 

847 and (result := await wrong_quote.vote(vote, fast)) is not None 

848 ): 

849 return result 

850 if wrong_quote and wrong_quote.id == WRONGQUOTE_DELETED: 

851 raise HTTPError(404) 

852 # we don't know the wrong_quote_id, so we have to create the wrong_quote 

853 data = await make_api_request( 

854 "wrongquotes", 

855 method="POST", 

856 body={ 

857 "quote": str(quote_id), 

858 "author": str(author_id), 

859 "contributed_by": contributed_by, 

860 }, 

861 entity_should_exist=True, 

862 ) 

863 if data is None: 

864 LOGGER.error( 

865 "Creating wrong quote (%s-%s) failed with 404", quote_id, author_id 

866 ) 

867 raise HTTPError(500) 

868 wrong_quote = parse_wrong_quote(data) 

869 if wrong_quote.id == WRONGQUOTE_DELETED: 

870 raise HTTPError(404) 

871 if (result := await wrong_quote.vote(vote, lazy=True)) is not None: 

872 return result 

873 LOGGER.error( 

874 "Voting just created wrong quote (%s) failed with 404", 

875 wrong_quote.get_id_as_str(True), 

876 ) 

877 raise HTTPError(500) 

878 

879 

880class QuoteReadyCheckHandler(HTMLRequestHandler): 

881 """Class that checks if quotes have been loaded.""" 

882 

883 async def check_ready(self) -> None: 

884 """Fail if quotes aren't ready yet.""" 

885 if not WRONG_QUOTES_CACHE: 

886 # should work in a few seconds, the quotes just haven't loaded yet 

887 self.set_header("Retry-After", "5") 

888 raise HTTPError(503, reason="Service available in a few seconds") 

889 

890 async def prepare(self) -> None: # noqa: D102 

891 await super().prepare() 

892 if self.request.method != "OPTIONS": 

893 await self.check_ready() 

894 

895 if ( # pylint: disable=too-many-boolean-expressions 

896 self.settings.get("RATELIMITS") 

897 and self.request.method not in {"HEAD", "OPTIONS"} 

898 and not self.is_authorized(Permission.RATELIMITS) 

899 and not self.crawler 

900 and ( 

901 self.request.path.endswith(".xlsx") 

902 or self.content_type == "application/vnd.ms-excel" 

903 ) 

904 ): 

905 if self.settings.get("UNDER_ATTACK") or not EVENT_REDIS.is_set(): 

906 raise HTTPError(503) 

907 

908 ratelimited, headers = await ratelimit( 

909 self.redis, 

910 self.redis_prefix, 

911 str(self.request.remote_ip), 

912 bucket="quotes:image:xlsx", 

913 max_burst=4, 

914 count_per_period=1, 

915 period=60, 

916 tokens=1 if self.request.method != "HEAD" else 0, 

917 ) 

918 

919 for header, value in headers.items(): 

920 self.set_header(header, value) 

921 

922 if ratelimited: 

923 if self.now.date() == date(self.now.year, 4, 20): 

924 self.set_status(420) 

925 self.write_error(420) 

926 else: 

927 self.set_status(429) 

928 self.write_error(429)