Coverage for an_website/utils/base_request_handler.py: 79.508%

488 statements  

« prev     ^ index     » next       coverage.py v7.6.4, created at 2024-11-16 19:56 +0000

1# This program is free software: you can redistribute it and/or modify 

2# it under the terms of the GNU Affero General Public License as 

3# published by the Free Software Foundation, either version 3 of the 

4# License, or (at your option) any later version. 

5# 

6# This program is distributed in the hope that it will be useful, 

7# but WITHOUT ANY WARRANTY; without even the implied warranty of 

8# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

9# GNU Affero General Public License for more details. 

10# 

11# You should have received a copy of the GNU Affero General Public License 

12# along with this program. If not, see <https://www.gnu.org/licenses/>. 

13# pylint: disable=too-many-lines 

14 

15""" 

16The base request handler used by other modules. 

17 

18This should only contain the BaseRequestHandler class. 

19""" 

20 

21from __future__ import annotations 

22 

23import contextlib 

24import inspect 

25import logging 

26import secrets 

27import sys 

28import traceback 

29import uuid 

30from asyncio import Future 

31from base64 import b64decode 

32from collections.abc import Awaitable, Callable, Coroutine 

33from contextvars import ContextVar 

34from datetime import date, datetime, timedelta, timezone, tzinfo 

35from functools import cached_property, partial, reduce 

36from random import Random, choice as random_choice 

37from types import TracebackType 

38from typing import Any, ClassVar, Final, cast, override 

39from urllib.parse import SplitResult, urlsplit, urlunsplit 

40from zoneinfo import ZoneInfo 

41 

42import elasticapm 

43import html2text 

44import orjson as json 

45import regex 

46import tornado.web 

47import yaml 

48from accept_types import get_best_match # type: ignore[import-untyped] 

49from ansi2html import Ansi2HTMLConverter 

50from bs4 import BeautifulSoup 

51from dateutil.easter import easter 

52from elastic_transport import ApiError, TransportError 

53from elasticsearch import AsyncElasticsearch 

54from openmoji_dist import VERSION as OPENMOJI_VERSION 

55from redis.asyncio import Redis 

56from tornado.httputil import HTTPServerRequest 

57from tornado.iostream import StreamClosedError 

58from tornado.web import ( 

59 Finish, 

60 GZipContentEncoding, 

61 HTTPError, 

62 MissingArgumentError, 

63 OutputTransform, 

64) 

65 

66from .. import ( 

67 EVENT_ELASTICSEARCH, 

68 EVENT_REDIS, 

69 GH_ORG_URL, 

70 GH_PAGES_URL, 

71 GH_REPO_URL, 

72 NAME, 

73 ORJSON_OPTIONS, 

74 pytest_is_running, 

75) 

76from .decorators import is_authorized 

77from .options import Options 

78from .static_file_handling import FILE_HASHES_DICT, fix_static_path 

79from .themes import THEMES 

80from .utils import ( 

81 ModuleInfo, 

82 Permission, 

83 add_args_to_url, 

84 ansi_replace, 

85 apply, 

86 backspace_replace, 

87 bool_to_str, 

88 emoji2html, 

89 geoip, 

90 hash_bytes, 

91 is_prime, 

92 ratelimit, 

93 str_to_bool, 

94) 

95 

96LOGGER: Final = logging.getLogger(__name__) 

97 

98TEXT_CONTENT_TYPES: Final[set[str]] = { 

99 "application/javascript", 

100 "application/json", 

101 "application/vnd.asozial.dynload+json", 

102 "application/x-ndjson", 

103 "application/xml", 

104 "application/yaml", 

105} 

106 

107request_ctx_var: ContextVar[HTTPServerRequest] = ContextVar("current_request") 

108 

109 

110class _RequestHandler(tornado.web.RequestHandler): 

111 """Base for tornado request handlers.""" 

112 

113 @override 

114 async def _execute( 

115 self, transforms: list[OutputTransform], *args: bytes, **kwargs: bytes 

116 ) -> None: 

117 request_ctx_var.set(self.request) 

118 return await super()._execute(transforms, *args, **kwargs) 

119 

120 # pylint: disable-next=protected-access 

121 _execute.__doc__ = tornado.web.RequestHandler._execute.__doc__ 

122 

123 @override 

124 def data_received( # noqa: D102 

125 self, chunk: bytes 

126 ) -> None | Awaitable[None]: 

127 pass 

128 

129 data_received.__doc__ = tornado.web.RequestHandler.data_received.__doc__ 

130 

131 @override 

132 def log_exception( 

133 self, 

134 typ: None | type[BaseException], 

135 value: None | BaseException, 

136 tb: None | TracebackType, 

137 ) -> None: 

138 if isinstance(value, HTTPError): 

139 super().log_exception(typ, value, tb) 

140 elif typ is StreamClosedError: 

141 LOGGER.debug( 

142 "Stream closed %s", 

143 self._request_summary(), 

144 exc_info=(typ, value, tb), # type: ignore[arg-type] 

145 ) 

146 else: 

147 LOGGER.error( 

148 "Uncaught exception %s", 

149 self._request_summary(), 

150 exc_info=(typ, value, tb), # type: ignore[arg-type] 

151 ) 

152 

153 log_exception.__doc__ = tornado.web.RequestHandler.log_exception.__doc__ 

154 

155 

156class BaseRequestHandler(_RequestHandler): 

157 """The base request handler used by every page and API.""" 

158 

159 # pylint: disable=too-many-instance-attributes, too-many-public-methods 

160 

161 ELASTIC_RUM_URL: ClassVar[str] = ( 

162 "/@elastic/apm-rum@5.12.0/dist/bundles/elastic-apm-rum" 

163 f".umd{'.min' if not sys.flags.dev_mode else ''}.js" 

164 ) 

165 

166 COMPUTE_ETAG: ClassVar[bool] = True 

167 ALLOW_COMPRESSION: ClassVar[bool] = True 

168 MAX_BODY_SIZE: ClassVar[None | int] = None 

169 ALLOWED_METHODS: ClassVar[tuple[str, ...]] = ("GET",) 

170 POSSIBLE_CONTENT_TYPES: ClassVar[tuple[str, ...]] = () 

171 

172 module_info: ModuleInfo 

173 # info about page, can be overridden in module_info 

174 title: str = "Das Asoziale Netzwerk" 

175 short_title: str = "Asoziales Netzwerk" 

176 description: str = "Die tolle Webseite des Asozialen Netzwerks" 

177 

178 used_render: bool = False 

179 

180 active_origin_trials: set[str] 

181 content_type: None | str = None 

182 apm_script: None | str 

183 crawler: bool = False 

184 nonce: str 

185 

186 def _finish( 

187 self, chunk: None | str | bytes | dict[str, Any] = None 

188 ) -> Future[None]: 

189 if self._finished: 

190 raise RuntimeError("finish() called twice") 

191 

192 if chunk is not None: 

193 self.write(chunk) 

194 

195 if ( # pylint: disable=too-many-boolean-expressions 

196 (content_type := self.content_type) 

197 and ( 

198 content_type in TEXT_CONTENT_TYPES 

199 or content_type.startswith("text/") 

200 or content_type.endswith(("+xml", "+json")) 

201 ) 

202 and self._write_buffer 

203 and not self._write_buffer[-1].endswith(b"\n") 

204 ): 

205 self.write(b"\n") 

206 

207 return super().finish() 

208 

209 @property 

210 def apm_client(self) -> None | elasticapm.Client: 

211 """Get the APM client from the settings.""" 

212 return self.settings.get("ELASTIC_APM", {}).get("CLIENT") # type: ignore[no-any-return] 

213 

214 @property 

215 def apm_enabled(self) -> bool: 

216 """Return whether APM is enabled.""" 

217 return bool(self.settings.get("ELASTIC_APM", {}).get("ENABLED")) 

218 

219 @override 

220 def compute_etag(self) -> None | str: 

221 """Compute ETag with Base85 encoding.""" 

222 if not self.COMPUTE_ETAG: 

223 return None 

224 return f'"{hash_bytes(*self._write_buffer)}"' # noqa: B907 

225 

226 @override 

227 def data_received( # noqa: D102 

228 self, chunk: bytes 

229 ) -> None | Awaitable[None]: 

230 pass 

231 

232 @override 

233 def decode_argument( # noqa: D102 

234 self, value: bytes, name: str | None = None 

235 ) -> str: 

236 try: 

237 return value.decode("UTF-8", "replace") 

238 except UnicodeDecodeError as exc: 

239 err_msg = f"Invalid unicode in {name or 'url'}: {value[:40]!r}" 

240 LOGGER.exception(err_msg, exc_info=exc) 

241 raise HTTPError(400, err_msg) from exc 

242 

243 @property 

244 def dump(self) -> Callable[[Any], str | bytes]: 

245 """Get the function for dumping the output.""" 

246 yaml_subset = self.content_type in { 

247 "application/json", 

248 "application/vnd.asozial.dynload+json", 

249 } 

250 

251 if self.content_type == "application/yaml": 

252 if self.now.timetuple()[2:0:-1] == (1, 4): 

253 yaml_subset = True 

254 else: 

255 return lambda spam: yaml.dump( 

256 spam, 

257 width=self.get_int_argument("yaml_width", 80, min_=80), 

258 ) 

259 

260 if yaml_subset: 

261 option = ORJSON_OPTIONS 

262 if self.get_bool_argument("pretty", False): 

263 option |= json.OPT_INDENT_2 

264 return lambda spam: json.dumps(spam, option=option) 

265 

266 return lambda spam: spam 

267 

268 @property 

269 def elasticsearch(self) -> AsyncElasticsearch: 

270 """ 

271 Get the Elasticsearch client from the settings. 

272 

273 This is None if Elasticsearch is not enabled. 

274 """ 

275 return cast(AsyncElasticsearch, self.settings.get("ELASTICSEARCH")) 

276 

277 @property 

278 def elasticsearch_prefix(self) -> str: 

279 """Get the Elasticsearch prefix from the settings.""" 

280 return self.settings.get( # type: ignore[no-any-return] 

281 "ELASTICSEARCH_PREFIX", NAME 

282 ) 

283 

284 @override 

285 def finish( # noqa: D102 

286 self, chunk: None | str | bytes | dict[Any, Any] = None 

287 ) -> Future[None]: 

288 as_json = self.content_type == "application/vnd.asozial.dynload+json" 

289 as_plain_text = self.content_type == "text/plain" 

290 as_markdown = self.content_type == "text/markdown" 

291 

292 if ( 

293 not isinstance(chunk, bytes | str) 

294 or self.content_type == "text/html" 

295 or not self.used_render 

296 or not (as_json or as_plain_text or as_markdown) 

297 ): 

298 return self._finish(chunk) 

299 

300 chunk = chunk.decode("UTF-8") if isinstance(chunk, bytes) else chunk 

301 

302 if as_markdown: 

303 return self._finish( 

304 f"# {self.title}\n\n" 

305 + html2text.html2text(chunk, self.request.full_url()).strip() 

306 ) 

307 

308 soup = BeautifulSoup(chunk, features="lxml") 

309 

310 if as_plain_text: 

311 return self._finish(soup.get_text("\n", True)) 

312 

313 dictionary: dict[str, Any] = { 

314 "url": self.fix_url(), 

315 "title": self.title, 

316 "short_title": ( 

317 self.short_title if self.title != self.short_title else None 

318 ), 

319 "body": "".join( 

320 str(element) 

321 for element in soup.find_all(name="main")[0].contents 

322 ).strip(), 

323 "scripts": ( 

324 [ 

325 {"script": script.string} | script.attrs 

326 for script in soup.find_all("script") 

327 ] 

328 if soup.head 

329 else [] 

330 ), 

331 "stylesheets": ( 

332 [ 

333 stylesheet.get("href").strip() 

334 for stylesheet in soup.find_all("link", rel="stylesheet") 

335 ] 

336 if soup.head 

337 else [] 

338 ), 

339 "css": ( 

340 "\n".join(style.string for style in soup.find_all("style")) 

341 if soup.head 

342 else "" 

343 ), 

344 } 

345 

346 return self._finish(dictionary) 

347 

348 finish.__doc__ = _RequestHandler.finish.__doc__ 

349 

350 def finish_dict(self, **kwargs: Any) -> Future[None]: 

351 """Finish the request with a dictionary.""" 

352 return self.finish(kwargs) 

353 

354 def fix_url( 

355 self, 

356 url: None | str | SplitResult = None, 

357 new_path: None | str = None, 

358 **query_args: None | str | bool | float, 

359 ) -> str: 

360 """ 

361 Fix a URL and return it. 

362 

363 If the URL is from another website, link to it with the redirect page, 

364 otherwise just return the URL with no_3rd_party appended. 

365 """ 

366 if url is None: 

367 url = self.request.full_url() 

368 if isinstance(url, str): 

369 url = urlsplit(url) 

370 if url.netloc and url.netloc.lower() != self.request.host.lower(): 

371 if ( 

372 not self.user_settings.ask_before_leaving 

373 or not self.settings.get("REDIRECT_MODULE_LOADED") 

374 ): 

375 return url.geturl() 

376 path = "/redirect" 

377 query_args["to"] = url.geturl() 

378 url = urlsplit(self.request.full_url()) 

379 else: 

380 path = url.path if new_path is None else new_path 

381 path = f"/{path.strip('/')}".lower() 

382 if path == "/lolwut": 

383 path = path.upper() 

384 if path.startswith("/soundboard/files/") or path in FILE_HASHES_DICT: 

385 query_args.update( 

386 dict.fromkeys(self.user_settings.iter_option_names()) 

387 ) 

388 else: 

389 for ( 

390 key, 

391 value, 

392 ) in self.user_settings.as_dict_with_str_values().items(): 

393 query_args.setdefault(key, value) 

394 for key, value in self.user_settings.as_dict_with_str_values( 

395 include_query_argument=False, 

396 include_body_argument=self.request.path == "/einstellungen" 

397 and self.get_bool_argument("save_in_cookie", False), 

398 ).items(): 

399 if value == query_args[key]: 

400 query_args[key] = None 

401 

402 return add_args_to_url( 

403 urlunsplit( 

404 ( 

405 self.request.protocol, 

406 self.request.host, 

407 "" if path == "/" else path, 

408 url.query, 

409 url.fragment, 

410 ) 

411 ), 

412 **query_args, 

413 ) 

414 

415 def geoip( 

416 self, 

417 ip: None | str = None, 

418 database: str = geoip.__defaults__[0], # type: ignore[index] 

419 *, 

420 allow_fallback: bool = True, 

421 ) -> Coroutine[None, None, None | dict[str, Any]]: 

422 """Get GeoIP information.""" 

423 if not ip: 

424 ip = self.request.remote_ip 

425 if not EVENT_ELASTICSEARCH.is_set(): 

426 return geoip(ip, database) 

427 return geoip( 

428 ip, database, self.elasticsearch, allow_fallback=allow_fallback 

429 ) 

430 

431 @classmethod 

432 def get_allowed_methods(cls) -> list[str]: 

433 """Get allowed methods.""" 

434 methods = {"OPTIONS", *cls.ALLOWED_METHODS} 

435 if "GET" in cls.ALLOWED_METHODS and cls.supports_head(): 

436 methods.add("HEAD") 

437 return sorted(methods) 

438 

439 def get_bool_argument( 

440 self, 

441 name: str, 

442 default: None | bool = None, 

443 ) -> bool: 

444 """Get an argument parsed as boolean.""" 

445 if default is not None: 

446 return str_to_bool(self.get_argument(name, ""), default) 

447 value = str(self.get_argument(name)) 

448 try: 

449 return str_to_bool(value) 

450 except ValueError as err: 

451 raise HTTPError(400, f"{value} is not a boolean") from err 

452 

453 def get_display_theme(self) -> str: 

454 """Get the theme currently displayed.""" 

455 theme = self.user_settings.theme 

456 

457 if theme == "default" and self.now.month == 12: 

458 return "christmas" 

459 

460 if theme.split("_")[0] != "random": 

461 return theme 

462 

463 ignore_themes = ["random", "random_dark"] 

464 

465 if self.now.month != 12: 

466 ignore_themes.append("christmas") 

467 

468 if theme == "random_dark": 

469 ignore_themes.extend(("light", "light_blue", "fun")) 

470 

471 return random_choice( # nosec: B311 

472 tuple(theme for theme in THEMES if theme not in ignore_themes) 

473 ) 

474 

475 def get_error_message(self, **kwargs: Any) -> str: 

476 """ 

477 Get the error message and return it. 

478 

479 If the serve_traceback setting is true (debug mode is activated), 

480 the traceback gets returned. 

481 """ 

482 if "exc_info" in kwargs and not issubclass( 

483 kwargs["exc_info"][0], HTTPError 

484 ): 

485 if self.settings.get("serve_traceback") or self.is_authorized( 

486 Permission.TRACEBACK 

487 ): 

488 return "".join( 

489 traceback.format_exception(*kwargs["exc_info"]) 

490 ).strip() 

491 return "".join( 

492 traceback.format_exception_only(*kwargs["exc_info"][:2]) 

493 ).strip() 

494 if "exc_info" in kwargs and issubclass( 

495 kwargs["exc_info"][0], MissingArgumentError 

496 ): 

497 return cast(str, kwargs["exc_info"][1].log_message) 

498 return str(self._reason) 

499 

500 def get_error_page_description(self, status_code: int) -> str: 

501 """Get the description for the error page.""" 

502 # pylint: disable=too-many-return-statements 

503 # https://developer.mozilla.org/docs/Web/HTTP/Status 

504 if 100 <= status_code <= 199: 

505 return "Hier gibt es eine total wichtige Information." 

506 if 200 <= status_code <= 299: 

507 return "Hier ist alles super! 🎶🎶" 

508 if 300 <= status_code <= 399: 

509 return "Eine Umleitung ist eingerichtet." 

510 if 400 <= status_code <= 499: 

511 if status_code == 404: 

512 return f"{self.request.path} wurde nicht gefunden." 

513 if status_code == 451: 

514 return "Hier wäre bestimmt geiler Scheiß." 

515 return "Ein Client-Fehler ist aufgetreten." 

516 if 500 <= status_code <= 599: 

517 return "Ein Server-Fehler ist aufgetreten." 

518 raise ValueError( 

519 f"{status_code} is not a valid HTTP response status code." 

520 ) 

521 

522 def get_int_argument( 

523 self, 

524 name: str, 

525 default: None | int = None, 

526 *, 

527 max_: None | int = None, 

528 min_: None | int = None, 

529 ) -> int: 

530 """Get an argument parsed as integer.""" 

531 if default is None: 

532 str_value = self.get_argument(name) 

533 try: 

534 value = int(str_value, base=0) 

535 except ValueError as err: 

536 raise HTTPError(400, f"{str_value} is not an integer") from err 

537 elif self.get_argument(name, ""): 

538 try: 

539 value = int(self.get_argument(name), base=0) 

540 except ValueError: 

541 value = default 

542 else: 

543 value = default 

544 

545 if max_ is not None: 

546 value = min(max_, value) 

547 if min_ is not None: 

548 value = max(min_, value) 

549 

550 return value 

551 

552 def get_module_infos(self) -> tuple[ModuleInfo, ...]: 

553 """Get the module infos.""" 

554 return self.settings.get("MODULE_INFOS") or () 

555 

556 def get_reporting_api_endpoint(self) -> None | str: 

557 """Get the endpoint for the Reporting API™️.""" 

558 if not self.settings.get("REPORTING"): 

559 return None 

560 endpoint = self.settings.get("REPORTING_ENDPOINT") 

561 

562 if not endpoint or not endpoint.startswith("/"): 

563 return endpoint 

564 

565 return f"{self.request.protocol}://{self.request.host}{endpoint}" 

566 

567 @override 

568 def get_template_namespace(self) -> dict[str, Any]: 

569 """ 

570 Add useful things to the template namespace and return it. 

571 

572 They are mostly needed by most of the pages (like title, 

573 description and no_3rd_party). 

574 """ 

575 namespace = super().get_template_namespace() 

576 ansi2html = partial( 

577 Ansi2HTMLConverter(inline=True, scheme="xterm").convert, full=False 

578 ) 

579 namespace.update(self.user_settings.as_dict()) 

580 namespace.update( 

581 ansi2html=partial( 

582 reduce, apply, (ansi2html, ansi_replace, backspace_replace) 

583 ), 

584 apm_script=( 

585 self.settings["ELASTIC_APM"].get("INLINE_SCRIPT") 

586 if self.apm_enabled 

587 else None 

588 ), 

589 as_html=self.content_type == "text/html", 

590 c=self.now.date() == date(self.now.year, 4, 1) 

591 or str_to_bool(self.get_cookie("c", "f") or "f", False), 

592 canonical_url=self.fix_url( 

593 self.request.full_url().upper() 

594 if self.request.path.upper().startswith("/LOLWUT") 

595 else self.request.full_url().lower() 

596 ).split("?")[0], 

597 description=self.description, 

598 display_theme=self.get_display_theme(), 

599 elastic_rum_url=self.ELASTIC_RUM_URL, 

600 fix_static=lambda path: self.fix_url(fix_static_path(path)), 

601 fix_url=self.fix_url, 

602 emoji2html=( 

603 emoji2html 

604 if self.user_settings.openmoji == "img" 

605 else ( 

606 (lambda emoji: f'<span class="openmoji">{emoji}</span>') 

607 if self.user_settings.openmoji 

608 else (lambda emoji: emoji) 

609 ) 

610 ), 

611 form_appendix=self.user_settings.get_form_appendix(), 

612 GH_ORG_URL=GH_ORG_URL, 

613 GH_PAGES_URL=GH_PAGES_URL, 

614 GH_REPO_URL=GH_REPO_URL, 

615 keywords="Asoziales Netzwerk, Känguru-Chroniken" 

616 + ( 

617 f", {self.module_info.get_keywords_as_str(self.request.path)}" 

618 if self.module_info # type: ignore[truthy-bool] 

619 else "" 

620 ), 

621 lang="de", # TODO: add language support 

622 nonce=self.nonce, 

623 now=self.now, 

624 openmoji_version=OPENMOJI_VERSION, 

625 settings=self.settings, 

626 short_title=self.short_title, 

627 testing=pytest_is_running(), 

628 title=self.title, 

629 ) 

630 namespace.update( 

631 { 

632 "🥚": timedelta() 

633 <= self.now.date() - easter(self.now.year) 

634 < timedelta(days=2), 

635 "🦘": is_prime(self.now.microsecond), 

636 } 

637 ) 

638 return namespace 

639 

640 async def get_time(self) -> datetime: 

641 """Get the start time of the request in the users' timezone.""" 

642 tz: tzinfo = timezone.utc 

643 try: 

644 geoip = await self.geoip() # pylint: disable=redefined-outer-name 

645 except (ApiError, TransportError): 

646 LOGGER.exception("Elasticsearch request failed") 

647 if self.apm_client: 

648 self.apm_client.capture_exception() # type: ignore[no-untyped-call] 

649 else: 

650 if geoip and "timezone" in geoip: 

651 tz = ZoneInfo(geoip["timezone"]) 

652 return datetime.fromtimestamp( 

653 self.request._start_time, tz=tz # pylint: disable=protected-access 

654 ) 

655 

656 def get_user_id(self) -> str: 

657 """Get the user id saved in the cookie or create one.""" 

658 cookie = self.get_secure_cookie( 

659 "user_id", 

660 max_age_days=90, 

661 min_version=2, 

662 ) 

663 

664 user_id = cookie.decode("UTF-8") if cookie else str(uuid.uuid4()) 

665 

666 if not self.get_secure_cookie( # save it in cookie or reset expiry date 

667 "user_id", max_age_days=30, min_version=2 

668 ): 

669 self.set_secure_cookie( 

670 "user_id", 

671 user_id, 

672 expires_days=90, 

673 path="/", 

674 samesite="Strict", 

675 ) 

676 

677 return user_id 

678 

679 def handle_accept_header( # pylint: disable=inconsistent-return-statements 

680 self, possible_content_types: tuple[str, ...], strict: bool = True 

681 ) -> None: 

682 """Handle the Accept header and set `self.content_type`.""" 

683 if not possible_content_types: 

684 return 

685 content_type = get_best_match( 

686 self.request.headers.get("Accept") or "*/*", 

687 possible_content_types, 

688 ) 

689 if content_type is None: 

690 if strict: 

691 return self.handle_not_acceptable(possible_content_types) 

692 content_type = possible_content_types[0] 

693 self.content_type = content_type 

694 self.set_content_type_header() 

695 

696 def handle_not_acceptable( 

697 self, possible_content_types: tuple[str, ...] 

698 ) -> None: 

699 """Only call this if we cannot respect the Accept header.""" 

700 self.clear_header("Content-Type") 

701 self.set_status(406) 

702 raise Finish("\n".join(possible_content_types) + "\n") 

703 

704 def head(self, *args: Any, **kwargs: Any) -> None | Awaitable[None]: 

705 """Handle HEAD requests.""" 

706 if self.get.__module__ == "tornado.web": 

707 raise HTTPError(405) 

708 if not self.supports_head(): 

709 raise HTTPError(501) 

710 

711 kwargs["head"] = True 

712 return self.get(*args, **kwargs) 

713 

714 @override 

715 def initialize( 

716 self, 

717 *, 

718 module_info: ModuleInfo, 

719 # default is true, because then empty args dicts are 

720 # enough to specify that the defaults should be used 

721 default_title: bool = True, 

722 default_description: bool = True, 

723 ) -> None: 

724 """ 

725 Get title and description from the kwargs. 

726 

727 If title and description are present in the kwargs, 

728 then they override self.title and self.description. 

729 """ 

730 self.module_info = module_info 

731 if not default_title: 

732 page_info = self.module_info.get_page_info(self.request.path) 

733 self.title = page_info.name 

734 self.short_title = page_info.short_name or self.title 

735 if not default_description: 

736 self.description = self.module_info.get_page_info( 

737 self.request.path 

738 ).description 

739 

740 def is_authorized( 

741 self, permission: Permission, allow_cookie_auth: bool = True 

742 ) -> bool | None: 

743 """Check whether the request is authorized.""" 

744 return is_authorized(self, permission, allow_cookie_auth) 

745 

746 @cached_property 

747 def now(self) -> datetime: 

748 """Get the current time.""" 

749 # pylint: disable=method-hidden 

750 if pytest_is_running(): 

751 raise AssertionError("Now accessed before it was set") 

752 if self.request.method in self.SUPPORTED_METHODS: 

753 LOGGER.error("Now accessed before it was set", stacklevel=3) 

754 return datetime.fromtimestamp( 

755 self.request._start_time, # pylint: disable=protected-access 

756 tz=timezone.utc, 

757 ) 

758 

759 @override 

760 async def options(self, *args: Any, **kwargs: Any) -> None: 

761 """Handle OPTIONS requests.""" 

762 # pylint: disable=unused-argument 

763 self.set_header("Allow", ", ".join(self.get_allowed_methods())) 

764 self.set_status(204) 

765 await self.finish() 

766 

767 def origin_trial(self, token: bytes | str) -> bool: 

768 """Enable an experimental feature.""" 

769 # pylint: disable=protected-access 

770 payload = json.loads(b64decode(token)[69:]) 

771 if payload["feature"] in self.active_origin_trials: 

772 return True 

773 origin = urlsplit(payload["origin"]) 

774 url = urlsplit(self.request.full_url()) 

775 if url.port is None and url.scheme in {"http", "https"}: 

776 url = url._replace( 

777 netloc=f"{url.hostname}:{443 if url.scheme == 'https' else 80}" 

778 ) 

779 if self.request._start_time > payload["expiry"]: 

780 return False 

781 if url.scheme != origin.scheme: 

782 return False 

783 if url.netloc != origin.netloc and not ( 

784 payload.get("isSubdomain") 

785 and url.netloc.endswith(f".{origin.netloc}") 

786 ): 

787 return False 

788 self.add_header("Origin-Trial", token) 

789 self.active_origin_trials.add(payload["feature"]) 

790 return True 

791 

792 @override 

793 async def prepare(self) -> None: 

794 """Check authorization and call self.ratelimit().""" 

795 # pylint: disable=invalid-overridden-method 

796 self.now = await self.get_time() 

797 

798 if not self.ALLOW_COMPRESSION: 

799 for transform in self._transforms: 

800 if isinstance(transform, GZipContentEncoding): 

801 # pylint: disable=protected-access 

802 transform._gzipping = False 

803 

804 if crawler_secret := self.settings.get("CRAWLER_SECRET"): 

805 self.crawler = crawler_secret in self.request.headers.get( 

806 "User-Agent", "" 

807 ) 

808 

809 self.handle_accept_header(self.POSSIBLE_CONTENT_TYPES) 

810 

811 if ( 

812 self.request.method in {"GET", "HEAD"} 

813 and self.redirect_to_canonical_domain() 

814 ): 

815 return 

816 

817 if self.request.method == "GET" and ( 

818 days := Random(self.now.timestamp()).randint(0, 31337) 

819 ) in { 

820 69, 

821 420, 

822 1337, 

823 31337, 

824 }: 

825 self.set_cookie("c", "s", expires_days=days / 24, path="/") 

826 

827 if self.request.method != "OPTIONS": 

828 if ( 

829 self.MAX_BODY_SIZE is not None 

830 and len(self.request.body) > self.MAX_BODY_SIZE 

831 ): 

832 LOGGER.warning( 

833 "%s > MAX_BODY_SIZE (%s)", 

834 len(self.request.body), 

835 self.MAX_BODY_SIZE, 

836 ) 

837 raise HTTPError(413) 

838 

839 if not await self.ratelimit(True): 

840 await self.ratelimit() 

841 

842 async def ratelimit(self, global_ratelimit: bool = False) -> bool: 

843 """Take b1nzy to space using Redis.""" 

844 if ( 

845 not self.settings.get("RATELIMITS") 

846 or self.request.method == "OPTIONS" 

847 or self.is_authorized(Permission.RATELIMITS) 

848 or self.crawler 

849 ): 

850 return False 

851 

852 if not EVENT_REDIS.is_set(): 

853 LOGGER.warning( 

854 ( 

855 "Ratelimits are enabled, but Redis is not available. " 

856 "This can happen shortly after starting the website." 

857 ), 

858 ) 

859 raise HTTPError(503) 

860 

861 if global_ratelimit: 

862 ratelimited, headers = await ratelimit( 

863 self.redis, 

864 self.redis_prefix, 

865 str(self.request.remote_ip), 

866 bucket=None, 

867 max_burst=99, # limit = 100 

868 count_per_period=20, # 20 requests per second 

869 period=1, 

870 tokens=10 if self.settings.get("UNDER_ATTACK") else 1, 

871 ) 

872 else: 

873 method = ( 

874 "GET" if self.request.method == "HEAD" else self.request.method 

875 ) 

876 if not (limit := getattr(self, f"RATELIMIT_{method}_LIMIT", 0)): 

877 return False 

878 ratelimited, headers = await ratelimit( 

879 self.redis, 

880 self.redis_prefix, 

881 str(self.request.remote_ip), 

882 bucket=getattr( 

883 self, 

884 f"RATELIMIT_{method}_BUCKET", 

885 self.__class__.__name__.lower(), 

886 ), 

887 max_burst=limit - 1, 

888 count_per_period=getattr( # request count per period 

889 self, 

890 f"RATELIMIT_{method}_COUNT_PER_PERIOD", 

891 30, 

892 ), 

893 period=getattr( 

894 self, f"RATELIMIT_{method}_PERIOD", 60 # period in seconds 

895 ), 

896 tokens=1 if self.request.method != "HEAD" else 0, 

897 ) 

898 

899 for header, value in headers.items(): 

900 self.set_header(header, value) 

901 

902 if ratelimited: 

903 if self.now.date() == date(self.now.year, 4, 20): 

904 self.set_status(420) 

905 self.write_error(420) 

906 else: 

907 self.set_status(429) 

908 self.write_error(429) 

909 

910 return ratelimited 

911 

912 def redirect_to_canonical_domain(self) -> bool: 

913 """Redirect to the canonical domain.""" 

914 if ( 

915 not (domain := self.settings.get("DOMAIN")) 

916 or not self.request.headers.get("Host") 

917 or self.request.host_name == domain 

918 or self.request.host_name.endswith((".onion", ".i2p")) 

919 or regex.fullmatch(r"/[\u2800-\u28FF]+/?", self.request.path) 

920 ): 

921 return False 

922 port = urlsplit(f"//{self.request.headers['Host']}").port 

923 self.redirect( 

924 urlsplit(self.request.full_url()) 

925 ._replace(netloc=f"{domain}:{port}" if port else domain) 

926 .geturl(), 

927 permanent=True, 

928 ) 

929 return True 

930 

931 @property 

932 def redis(self) -> Redis[str]: 

933 """ 

934 Get the Redis client from the settings. 

935 

936 This is None if Redis is not enabled. 

937 """ 

938 return cast("Redis[str]", self.settings.get("REDIS")) 

939 

940 @property 

941 def redis_prefix(self) -> str: 

942 """Get the Redis prefix from the settings.""" 

943 return self.settings.get( # type: ignore[no-any-return] 

944 "REDIS_PREFIX", NAME 

945 ) 

946 

947 @override 

948 def render( # noqa: D102 

949 self, template_name: str, **kwargs: Any 

950 ) -> Future[None]: 

951 self.used_render = True 

952 return super().render(template_name, **kwargs) 

953 

954 render.__doc__ = _RequestHandler.render.__doc__ 

955 

956 def set_content_type_header(self) -> None: 

957 """Set the Content-Type header based on `self.content_type`.""" 

958 if str(self.content_type).startswith("text/"): # RFC 2616 (3.7.1) 

959 self.set_header( 

960 "Content-Type", f"{self.content_type};charset=utf-8" 

961 ) 

962 elif self.content_type is not None: 

963 self.set_header("Content-Type", self.content_type) 

964 

965 @override 

966 def set_cookie( # noqa: D102 # pylint: disable=too-many-arguments 

967 self, 

968 name: str, 

969 value: str | bytes, 

970 domain: None | str = None, 

971 expires: None | float | tuple[int, ...] | datetime = None, 

972 path: str = "/", 

973 expires_days: None | float = 400, # changed 

974 **kwargs: Any, 

975 ) -> None: 

976 if "samesite" not in kwargs: 

977 # default for same site should be strict 

978 kwargs["samesite"] = "Strict" 

979 

980 super().set_cookie( 

981 name, 

982 value, 

983 domain, 

984 expires, 

985 path, 

986 expires_days, 

987 **kwargs, 

988 ) 

989 

990 set_cookie.__doc__ = _RequestHandler.set_cookie.__doc__ 

991 

992 def set_csp_header(self) -> None: 

993 """Set the Content-Security-Policy header.""" 

994 self.nonce = secrets.token_urlsafe(16) 

995 

996 script_src = ["'self'", f"'nonce-{self.nonce}'"] 

997 

998 if ( 

999 self.apm_enabled 

1000 and "INLINE_SCRIPT_HASH" in self.settings["ELASTIC_APM"] 

1001 ): 

1002 script_src.extend( 

1003 ( 

1004 f"'sha256-{self.settings['ELASTIC_APM']['INLINE_SCRIPT_HASH']}'", 

1005 "'unsafe-inline'", # for browsers that don't support hash 

1006 ) 

1007 ) 

1008 

1009 connect_src = ["'self'"] 

1010 

1011 if self.apm_enabled and "SERVER_URL" in self.settings["ELASTIC_APM"]: 

1012 rum_server_url = self.settings["ELASTIC_APM"].get("RUM_SERVER_URL") 

1013 if rum_server_url: 

1014 # the RUM agent needs to connect to rum_server_url 

1015 connect_src.append(rum_server_url) 

1016 elif rum_server_url is None: 

1017 # the RUM agent needs to connect to ["ELASTIC_APM"]["SERVER_URL"] 

1018 connect_src.append(self.settings["ELASTIC_APM"]["SERVER_URL"]) 

1019 

1020 connect_src.append( # fix for older browsers 

1021 ("wss" if self.request.protocol == "https" else "ws") 

1022 + f"://{self.request.host}" 

1023 ) 

1024 

1025 self.set_header( 

1026 "Content-Security-Policy", 

1027 "default-src 'self';" 

1028 f"script-src {' '.join(script_src)};" 

1029 f"connect-src {' '.join(connect_src)};" 

1030 "style-src 'self' 'unsafe-inline';" 

1031 "img-src 'self' https://img.zeit.de https://github.asozial.org;" 

1032 "frame-ancestors 'self';" 

1033 "sandbox allow-downloads allow-same-origin allow-modals" 

1034 " allow-popups-to-escape-sandbox allow-scripts allow-popups" 

1035 " allow-top-navigation-by-user-activation allow-forms;" 

1036 "report-to default;" 

1037 + ( 

1038 f"report-uri {self.get_reporting_api_endpoint()};" 

1039 if self.settings.get("REPORTING") 

1040 else "" 

1041 ), 

1042 ) 

1043 

1044 @override 

1045 def set_default_headers(self) -> None: 

1046 """Set default headers.""" 

1047 self.set_csp_header() 

1048 self.active_origin_trials = set() 

1049 if self.settings.get("REPORTING"): 

1050 endpoint = self.get_reporting_api_endpoint() 

1051 self.set_header( 

1052 "Reporting-Endpoints", 

1053 f'default="{endpoint}"', # noqa: B907 

1054 ) 

1055 self.set_header( 

1056 "Report-To", 

1057 json.dumps( 

1058 { 

1059 "group": "default", 

1060 "max_age": 2592000, 

1061 "endpoints": [{"url": endpoint}], 

1062 }, 

1063 option=ORJSON_OPTIONS, 

1064 ), 

1065 ) 

1066 self.set_header("NEL", '{"report_to":"default","max_age":2592000}') 

1067 self.set_header("X-Content-Type-Options", "nosniff") 

1068 self.set_header("Access-Control-Max-Age", "7200") 

1069 self.set_header("Access-Control-Allow-Origin", "*") 

1070 self.set_header("Access-Control-Allow-Headers", "*") 

1071 self.set_header( 

1072 "Access-Control-Allow-Methods", 

1073 ", ".join(self.get_allowed_methods()), 

1074 ) 

1075 self.set_header("Cross-Origin-Resource-Policy", "cross-origin") 

1076 self.set_header( 

1077 "Permissions-Policy", 

1078 "browsing-topics=()," 

1079 "identity-credentials-get=()," 

1080 "join-ad-interest-group=()," 

1081 "private-state-token-issuance=()," 

1082 "private-state-token-redemption=()," 

1083 "run-ad-auction=()", 

1084 ) 

1085 self.set_header("Referrer-Policy", "same-origin") 

1086 self.set_header( 

1087 "Cross-Origin-Opener-Policy", "same-origin; report-to=default" 

1088 ) 

1089 if self.request.path == "/kaenguru-comics-alt": # TODO: improve this 

1090 self.set_header( 

1091 "Cross-Origin-Embedder-Policy", 

1092 "credentialless; report-to=default", 

1093 ) 

1094 else: 

1095 self.set_header( 

1096 "Cross-Origin-Embedder-Policy", 

1097 "require-corp; report-to=default", 

1098 ) 

1099 if self.settings.get("HSTS"): 

1100 self.set_header("Strict-Transport-Security", "max-age=63072000") 

1101 if ( 

1102 onion_address := self.settings.get("ONION_ADDRESS") 

1103 ) and not self.request.host_name.endswith(".onion"): 

1104 self.set_header( 

1105 "Onion-Location", 

1106 onion_address 

1107 + self.request.path 

1108 + (f"?{self.request.query}" if self.request.query else ""), 

1109 ) 

1110 if self.settings.get("debug"): 

1111 self.set_header("X-Debug", bool_to_str(True)) 

1112 for permission in Permission: 

1113 if permission.name: 

1114 self.set_header( 

1115 f"X-Permission-{permission.name}", 

1116 bool_to_str(bool(self.is_authorized(permission))), 

1117 ) 

1118 self.set_header("Vary", "Accept, Authorization, Cookie") 

1119 self.origin_trial( 

1120 "AtmCLo6pBk5FVvAouMNTMnuKR6qZ59kLvYSyVFU54oq7wbRmx1cx1FhR+FivJqRPEeJAIEHXlM6L" 

1121 "hH7UcETrWw4AAABmeyJvcmlnaW4iOiJodHRwczovL2Fzb3ppYWwub3JnOjQ0MyIsImZlYXR1cmUi" 

1122 "OiJXZWJBcHBUYWJTdHJpcCIsImV4cGlyeSI6MTczMzE4NDAwMCwiaXNTdWJkb21haW4iOnRydWV9" 

1123 ) 

1124 

1125 set_default_headers.__doc__ = _RequestHandler.set_default_headers.__doc__ 

1126 

1127 @classmethod 

1128 def supports_head(cls) -> bool: 

1129 """Check whether this request handler supports HEAD requests.""" 

1130 signature = inspect.signature(cls.get) 

1131 return ( 

1132 "head" in signature.parameters 

1133 and signature.parameters["head"].kind 

1134 == inspect.Parameter.KEYWORD_ONLY 

1135 ) 

1136 

1137 @cached_property 

1138 def user_settings(self) -> Options: 

1139 """Get the user settings.""" 

1140 return Options(self) 

1141 

1142 @override 

1143 def write(self, chunk: str | bytes | dict[str, Any]) -> None: # noqa: D102 

1144 if self._finished: 

1145 raise RuntimeError("Cannot write() after finish()") 

1146 

1147 self.set_content_type_header() 

1148 

1149 if isinstance(chunk, dict): 

1150 chunk = self.dump(chunk) 

1151 

1152 if self.now.date() == date(self.now.year, 4, 27): 

1153 if isinstance(chunk, bytes): 

1154 with contextlib.suppress(UnicodeDecodeError): 

1155 chunk = chunk.decode("UTF-8") 

1156 if isinstance(chunk, str): 

1157 chunk = regex.sub( 

1158 r"\b\p{Lu}\p{Ll}{4}\p{Ll}*\b", 

1159 lambda match: ( 

1160 "Stanley" 

1161 if Random(match[0]).randrange(5) == self.now.year % 5 

1162 else match[0] 

1163 ), 

1164 chunk, 

1165 ) 

1166 

1167 super().write(chunk) 

1168 

1169 write.__doc__ = _RequestHandler.write.__doc__ 

1170 

1171 @override 

1172 def write_error(self, status_code: int, **kwargs: Any) -> None: 

1173 """Render the error page.""" 

1174 dict_content_types: tuple[str, str] = ( 

1175 "application/json", 

1176 "application/yaml", 

1177 ) 

1178 all_error_content_types: tuple[str, ...] = ( 

1179 # text/plain as first (default), to not screw up output in terminals 

1180 "text/plain", 

1181 "text/html", 

1182 "text/markdown", 

1183 *dict_content_types, 

1184 "application/vnd.asozial.dynload+json", 

1185 ) 

1186 

1187 if self.content_type not in all_error_content_types: 

1188 # don't send 406, instead default with text/plain 

1189 self.handle_accept_header(all_error_content_types, strict=False) 

1190 

1191 if self.content_type == "text/html": 

1192 self.render( # type: ignore[unused-awaitable] 

1193 "error.html", 

1194 status=status_code, 

1195 reason=self.get_error_message(**kwargs), 

1196 description=self.get_error_page_description(status_code), 

1197 is_traceback="exc_info" in kwargs 

1198 and not issubclass(kwargs["exc_info"][0], HTTPError) 

1199 and ( 

1200 self.settings.get("serve_traceback") 

1201 or self.is_authorized(Permission.TRACEBACK) 

1202 ), 

1203 ) 

1204 return 

1205 

1206 if self.content_type in dict_content_types: 

1207 self.finish( # type: ignore[unused-awaitable] 

1208 { 

1209 "status": status_code, 

1210 "reason": self.get_error_message(**kwargs), 

1211 } 

1212 ) 

1213 return 

1214 

1215 self.finish( # type: ignore[unused-awaitable] 

1216 f"{status_code} {self.get_error_message(**kwargs)}\n" 

1217 ) 

1218 

1219 write_error.__doc__ = _RequestHandler.write_error.__doc__