Coverage for an_website/utils/base_request_handler.py: 77.869%

488 statements  

« prev     ^ index     » next       coverage.py v7.8.0, created at 2025-04-01 14:47 +0000

1# This program is free software: you can redistribute it and/or modify 

2# it under the terms of the GNU Affero General Public License as 

3# published by the Free Software Foundation, either version 3 of the 

4# License, or (at your option) any later version. 

5# 

6# This program is distributed in the hope that it will be useful, 

7# but WITHOUT ANY WARRANTY; without even the implied warranty of 

8# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

9# GNU Affero General Public License for more details. 

10# 

11# You should have received a copy of the GNU Affero General Public License 

12# along with this program. If not, see <https://www.gnu.org/licenses/>. 

13# pylint: disable=too-many-lines 

14 

15""" 

16The base request handler used by other modules. 

17 

18This should only contain the BaseRequestHandler class. 

19""" 

20 

21from __future__ import annotations 

22 

23import contextlib 

24import inspect 

25import logging 

26import secrets 

27import sys 

28import traceback 

29import uuid 

30from asyncio import Future 

31from base64 import b64decode 

32from collections.abc import Awaitable, Callable, Coroutine 

33from contextvars import ContextVar 

34from datetime import date, datetime, timedelta, timezone, tzinfo 

35from functools import cached_property, partial, reduce 

36from random import Random, choice as random_choice 

37from types import TracebackType 

38from typing import Any, ClassVar, Final, cast, override 

39from urllib.parse import SplitResult, urlsplit, urlunsplit 

40from zoneinfo import ZoneInfo 

41 

42import elasticapm 

43import html2text 

44import orjson as json 

45import regex 

46import tornado.web 

47import yaml 

48from accept_types import get_best_match # type: ignore[import-untyped] 

49from ansi2html import Ansi2HTMLConverter 

50from bs4 import BeautifulSoup 

51from dateutil.easter import easter 

52from elastic_transport import ApiError, TransportError 

53from elasticsearch import AsyncElasticsearch 

54from openmoji_dist import VERSION as OPENMOJI_VERSION 

55from redis.asyncio import Redis 

56from tornado.httputil import HTTPServerRequest 

57from tornado.iostream import StreamClosedError 

58from tornado.web import ( 

59 Finish, 

60 GZipContentEncoding, 

61 HTTPError, 

62 MissingArgumentError, 

63 OutputTransform, 

64) 

65 

66from .. import ( 

67 EVENT_ELASTICSEARCH, 

68 EVENT_REDIS, 

69 GH_ORG_URL, 

70 GH_PAGES_URL, 

71 GH_REPO_URL, 

72 NAME, 

73 ORJSON_OPTIONS, 

74 pytest_is_running, 

75) 

76from .decorators import is_authorized 

77from .options import ColourScheme, Options 

78from .static_file_handling import FILE_HASHES_DICT, fix_static_path 

79from .themes import THEMES 

80from .utils import ( 

81 ModuleInfo, 

82 Permission, 

83 add_args_to_url, 

84 ansi_replace, 

85 apply, 

86 backspace_replace, 

87 bool_to_str, 

88 emoji2html, 

89 geoip, 

90 hash_bytes, 

91 is_prime, 

92 ratelimit, 

93 str_to_bool, 

94) 

95 

96LOGGER: Final = logging.getLogger(__name__) 

97 

98TEXT_CONTENT_TYPES: Final[set[str]] = { 

99 "application/javascript", 

100 "application/json", 

101 "application/vnd.asozial.dynload+json", 

102 "application/x-ndjson", 

103 "application/xml", 

104 "application/yaml", 

105} 

106 

107request_ctx_var: ContextVar[HTTPServerRequest] = ContextVar("current_request") 

108 

109 

110class _RequestHandler(tornado.web.RequestHandler): 

111 """Base for tornado request handlers.""" 

112 

113 @override 

114 async def _execute( 

115 self, transforms: list[OutputTransform], *args: bytes, **kwargs: bytes 

116 ) -> None: 

117 request_ctx_var.set(self.request) 

118 return await super()._execute(transforms, *args, **kwargs) 

119 

120 # pylint: disable-next=protected-access 

121 _execute.__doc__ = tornado.web.RequestHandler._execute.__doc__ 

122 

123 @override 

124 def data_received( # noqa: D102 

125 self, chunk: bytes 

126 ) -> None | Awaitable[None]: 

127 pass 

128 

129 data_received.__doc__ = tornado.web.RequestHandler.data_received.__doc__ 

130 

131 @override 

132 def log_exception( 

133 self, 

134 typ: None | type[BaseException], 

135 value: None | BaseException, 

136 tb: None | TracebackType, 

137 ) -> None: 

138 if isinstance(value, HTTPError): 

139 super().log_exception(typ, value, tb) 

140 elif typ is StreamClosedError: 

141 LOGGER.debug( 

142 "Stream closed %s", 

143 self._request_summary(), 

144 exc_info=(typ, value, tb), # type: ignore[arg-type] 

145 ) 

146 else: 

147 LOGGER.error( 

148 "Uncaught exception %s", 

149 self._request_summary(), 

150 exc_info=(typ, value, tb), # type: ignore[arg-type] 

151 ) 

152 

153 log_exception.__doc__ = tornado.web.RequestHandler.log_exception.__doc__ 

154 

155 

156class BaseRequestHandler(_RequestHandler): 

157 """The base request handler used by every page and API.""" 

158 

159 # pylint: disable=too-many-instance-attributes, too-many-public-methods 

160 

161 ELASTIC_RUM_URL: ClassVar[str] = ( 

162 "/@elastic/apm-rum@5.12.0/dist/bundles/elastic-apm-rum" 

163 f".umd{'.min' if not sys.flags.dev_mode else ''}.js" 

164 ) 

165 

166 COMPUTE_ETAG: ClassVar[bool] = True 

167 ALLOW_COMPRESSION: ClassVar[bool] = True 

168 MAX_BODY_SIZE: ClassVar[None | int] = None 

169 ALLOWED_METHODS: ClassVar[tuple[str, ...]] = ("GET",) 

170 POSSIBLE_CONTENT_TYPES: ClassVar[tuple[str, ...]] = () 

171 

172 module_info: ModuleInfo 

173 # info about page, can be overridden in module_info 

174 title: str = "Das Asoziale Netzwerk" 

175 short_title: str = "Asoziales Netzwerk" 

176 description: str = "Die tolle Webseite des Asozialen Netzwerks" 

177 

178 used_render: bool = False 

179 

180 active_origin_trials: set[str] 

181 content_type: None | str = None 

182 apm_script: None | str 

183 crawler: bool = False 

184 nonce: str 

185 

186 def _finish( 

187 self, chunk: None | str | bytes | dict[str, Any] = None 

188 ) -> Future[None]: 

189 if self._finished: 

190 raise RuntimeError("finish() called twice") 

191 

192 if chunk is not None: 

193 self.write(chunk) 

194 

195 if ( # pylint: disable=too-many-boolean-expressions 

196 (content_type := self.content_type) 

197 and ( 

198 content_type in TEXT_CONTENT_TYPES 

199 or content_type.startswith("text/") 

200 or content_type.endswith(("+xml", "+json")) 

201 ) 

202 and self._write_buffer 

203 and not self._write_buffer[-1].endswith(b"\n") 

204 ): 

205 self.write(b"\n") 

206 

207 return super().finish() 

208 

209 @property 

210 def apm_client(self) -> None | elasticapm.Client: 

211 """Get the APM client from the settings.""" 

212 return self.settings.get("ELASTIC_APM", {}).get("CLIENT") # type: ignore[no-any-return] 

213 

214 @property 

215 def apm_enabled(self) -> bool: 

216 """Return whether APM is enabled.""" 

217 return bool(self.settings.get("ELASTIC_APM", {}).get("ENABLED")) 

218 

219 @override 

220 def compute_etag(self) -> None | str: 

221 """Compute ETag with Base85 encoding.""" 

222 if not self.COMPUTE_ETAG: 

223 return None 

224 return f'"{hash_bytes(*self._write_buffer)}"' # noqa: B907 

225 

226 @override 

227 def data_received( # noqa: D102 

228 self, chunk: bytes 

229 ) -> None | Awaitable[None]: 

230 pass 

231 

232 @override 

233 def decode_argument( # noqa: D102 

234 self, value: bytes, name: str | None = None 

235 ) -> str: 

236 try: 

237 return value.decode("UTF-8", "replace") 

238 except UnicodeDecodeError as exc: 

239 err_msg = f"Invalid unicode in {name or 'url'}: {value[:40]!r}" 

240 LOGGER.exception(err_msg, exc_info=exc) 

241 raise HTTPError(400, err_msg) from exc 

242 

243 @property 

244 def dump(self) -> Callable[[Any], str | bytes]: 

245 """Get the function for dumping the output.""" 

246 yaml_subset = self.content_type in { 

247 "application/json", 

248 "application/vnd.asozial.dynload+json", 

249 } 

250 

251 if self.content_type == "application/yaml": 

252 if self.now.timetuple()[2:0:-1] == (1, 4): 

253 yaml_subset = True 

254 else: 

255 return lambda spam: yaml.dump( 

256 spam, 

257 width=self.get_int_argument("yaml_width", 80, min_=80), 

258 ) 

259 

260 if yaml_subset: 

261 option = ORJSON_OPTIONS 

262 if self.get_bool_argument("pretty", False): 

263 option |= json.OPT_INDENT_2 

264 return lambda spam: json.dumps(spam, option=option) 

265 

266 return lambda spam: spam 

267 

268 @property 

269 def elasticsearch(self) -> AsyncElasticsearch: 

270 """ 

271 Get the Elasticsearch client from the settings. 

272 

273 This is None if Elasticsearch is not enabled. 

274 """ 

275 return cast(AsyncElasticsearch, self.settings.get("ELASTICSEARCH")) 

276 

277 @property 

278 def elasticsearch_prefix(self) -> str: 

279 """Get the Elasticsearch prefix from the settings.""" 

280 return self.settings.get( # type: ignore[no-any-return] 

281 "ELASTICSEARCH_PREFIX", NAME 

282 ) 

283 

284 @override 

285 def finish( # noqa: D102 

286 self, chunk: None | str | bytes | dict[Any, Any] = None 

287 ) -> Future[None]: 

288 as_json = self.content_type == "application/vnd.asozial.dynload+json" 

289 as_plain_text = self.content_type == "text/plain" 

290 as_markdown = self.content_type == "text/markdown" 

291 

292 if ( 

293 not isinstance(chunk, bytes | str) 

294 or self.content_type == "text/html" 

295 or not self.used_render 

296 or not (as_json or as_plain_text or as_markdown) 

297 ): 

298 return self._finish(chunk) 

299 

300 chunk = chunk.decode("UTF-8") if isinstance(chunk, bytes) else chunk 

301 

302 if as_markdown: 

303 return self._finish( 

304 f"# {self.title}\n\n" 

305 + html2text.html2text(chunk, self.request.full_url()).strip() 

306 ) 

307 

308 soup = BeautifulSoup(chunk, features="lxml") 

309 

310 if as_plain_text: 

311 return self._finish(soup.get_text("\n", True)) 

312 

313 dictionary: dict[str, object] = { 

314 "url": self.fix_url(), 

315 "title": self.title, 

316 "short_title": ( 

317 self.short_title if self.title != self.short_title else None 

318 ), 

319 "body": "".join( 

320 str(element) 

321 for element in soup.find_all(name="main")[0].contents 

322 ).strip(), 

323 "scripts": [ 

324 {"script": script.string} | script.attrs 

325 for script in soup.find_all("script") 

326 ], 

327 "stylesheets": [ 

328 stylesheet.get("href").strip() 

329 for stylesheet in soup.find_all("link", rel="stylesheet") 

330 ], 

331 "css": "\n".join(style.string for style in soup.find_all("style")), 

332 } 

333 

334 return self._finish(dictionary) 

335 

336 finish.__doc__ = _RequestHandler.finish.__doc__ 

337 

338 def finish_dict(self, **kwargs: Any) -> Future[None]: 

339 """Finish the request with a dictionary.""" 

340 return self.finish(kwargs) 

341 

342 def fix_url( 

343 self, 

344 url: None | str | SplitResult = None, 

345 new_path: None | str = None, 

346 **query_args: None | str | bool | float, 

347 ) -> str: 

348 """ 

349 Fix a URL and return it. 

350 

351 If the URL is from another website, link to it with the redirect page, 

352 otherwise just return the URL with no_3rd_party appended. 

353 """ 

354 if url is None: 

355 url = self.request.full_url() 

356 if isinstance(url, str): 

357 url = urlsplit(url) 

358 if url.netloc and url.netloc.lower() != self.request.host.lower(): 

359 if ( 

360 not self.user_settings.ask_before_leaving 

361 or not self.settings.get("REDIRECT_MODULE_LOADED") 

362 ): 

363 return url.geturl() 

364 path = "/redirect" 

365 query_args["to"] = url.geturl() 

366 url = urlsplit(self.request.full_url()) 

367 else: 

368 path = url.path if new_path is None else new_path 

369 path = f"/{path.strip('/')}".lower() 

370 if path == "/lolwut": 

371 path = path.upper() 

372 if path.startswith("/soundboard/files/") or path in FILE_HASHES_DICT: 

373 query_args.update( 

374 dict.fromkeys(self.user_settings.iter_option_names()) 

375 ) 

376 else: 

377 for ( 

378 key, 

379 value, 

380 ) in self.user_settings.as_dict_with_str_values().items(): 

381 query_args.setdefault(key, value) 

382 for key, value in self.user_settings.as_dict_with_str_values( 

383 include_query_argument=False, 

384 include_body_argument=self.request.path == "/einstellungen" 

385 and self.get_bool_argument("save_in_cookie", False), 

386 ).items(): 

387 if value == query_args[key]: 

388 query_args[key] = None 

389 

390 return add_args_to_url( 

391 urlunsplit( 

392 ( 

393 self.request.protocol, 

394 self.request.host, 

395 "" if path == "/" else path, 

396 url.query, 

397 url.fragment, 

398 ) 

399 ), 

400 **query_args, 

401 ) 

402 

403 def geoip( 

404 self, 

405 ip: None | str = None, 

406 database: str = geoip.__defaults__[0], # type: ignore[index] 

407 *, 

408 allow_fallback: bool = True, 

409 ) -> Coroutine[None, None, None | dict[str, Any]]: 

410 """Get GeoIP information.""" 

411 if not ip: 

412 ip = self.request.remote_ip 

413 if not EVENT_ELASTICSEARCH.is_set(): 

414 return geoip(ip, database) 

415 return geoip( 

416 ip, database, self.elasticsearch, allow_fallback=allow_fallback 

417 ) 

418 

419 @classmethod 

420 def get_allowed_methods(cls) -> list[str]: 

421 """Get allowed methods.""" 

422 methods = {"OPTIONS", *cls.ALLOWED_METHODS} 

423 if "GET" in cls.ALLOWED_METHODS and cls.supports_head(): 

424 methods.add("HEAD") 

425 return sorted(methods) 

426 

427 def get_bool_argument( 

428 self, 

429 name: str, 

430 default: None | bool = None, 

431 ) -> bool: 

432 """Get an argument parsed as boolean.""" 

433 if default is not None: 

434 return str_to_bool(self.get_argument(name, ""), default) 

435 value = str(self.get_argument(name)) 

436 try: 

437 return str_to_bool(value) 

438 except ValueError as err: 

439 raise HTTPError(400, f"{value} is not a boolean") from err 

440 

441 def get_display_scheme(self) -> ColourScheme: 

442 """Get the scheme currently displayed.""" 

443 scheme = self.user_settings.scheme 

444 if scheme == "random": 

445 return ("light", "dark")[self.now.microsecond & 1] 

446 return scheme 

447 

448 def get_display_theme(self) -> str: 

449 """Get the theme currently displayed.""" 

450 theme = self.user_settings.theme 

451 

452 if theme == "default" and self.now.month == 12: 

453 return "christmas" 

454 

455 if theme != "random": 

456 return theme 

457 

458 ignore_themes = ("random", "christmas") 

459 

460 return random_choice( # nosec: B311 

461 tuple(theme for theme in THEMES if theme not in ignore_themes) 

462 ) 

463 

464 def get_error_message(self, **kwargs: Any) -> str: 

465 """ 

466 Get the error message and return it. 

467 

468 If the serve_traceback setting is true (debug mode is activated), 

469 the traceback gets returned. 

470 """ 

471 if "exc_info" in kwargs and not issubclass( 

472 kwargs["exc_info"][0], HTTPError 

473 ): 

474 if self.settings.get("serve_traceback") or self.is_authorized( 

475 Permission.TRACEBACK 

476 ): 

477 return "".join( 

478 traceback.format_exception(*kwargs["exc_info"]) 

479 ).strip() 

480 return "".join( 

481 traceback.format_exception_only(*kwargs["exc_info"][:2]) 

482 ).strip() 

483 if "exc_info" in kwargs and issubclass( 

484 kwargs["exc_info"][0], MissingArgumentError 

485 ): 

486 return cast(str, kwargs["exc_info"][1].log_message) 

487 return str(self._reason) 

488 

489 def get_error_page_description(self, status_code: int) -> str: 

490 """Get the description for the error page.""" 

491 # pylint: disable=too-many-return-statements 

492 # https://developer.mozilla.org/docs/Web/HTTP/Status 

493 if 100 <= status_code <= 199: 

494 return "Hier gibt es eine total wichtige Information." 

495 if 200 <= status_code <= 299: 

496 return "Hier ist alles super! 🎶🎶" 

497 if 300 <= status_code <= 399: 

498 return "Eine Umleitung ist eingerichtet." 

499 if 400 <= status_code <= 499: 

500 if status_code == 404: 

501 return f"{self.request.path} wurde nicht gefunden." 

502 if status_code == 451: 

503 return "Hier wäre bestimmt geiler Scheiß." 

504 return "Ein Client-Fehler ist aufgetreten." 

505 if 500 <= status_code <= 599: 

506 return "Ein Server-Fehler ist aufgetreten." 

507 raise ValueError( 

508 f"{status_code} is not a valid HTTP response status code." 

509 ) 

510 

511 def get_int_argument( 

512 self, 

513 name: str, 

514 default: None | int = None, 

515 *, 

516 max_: None | int = None, 

517 min_: None | int = None, 

518 ) -> int: 

519 """Get an argument parsed as integer.""" 

520 if default is None: 

521 str_value = self.get_argument(name) 

522 try: 

523 value = int(str_value, base=0) 

524 except ValueError as err: 

525 raise HTTPError(400, f"{str_value} is not an integer") from err 

526 elif self.get_argument(name, ""): 

527 try: 

528 value = int(self.get_argument(name), base=0) 

529 except ValueError: 

530 value = default 

531 else: 

532 value = default 

533 

534 if max_ is not None: 

535 value = min(max_, value) 

536 if min_ is not None: 

537 value = max(min_, value) 

538 

539 return value 

540 

541 def get_module_infos(self) -> tuple[ModuleInfo, ...]: 

542 """Get the module infos.""" 

543 return self.settings.get("MODULE_INFOS") or () 

544 

545 def get_reporting_api_endpoint(self) -> None | str: 

546 """Get the endpoint for the Reporting API™️.""" 

547 if not self.settings.get("REPORTING"): 

548 return None 

549 endpoint = self.settings.get("REPORTING_ENDPOINT") 

550 

551 if not endpoint or not endpoint.startswith("/"): 

552 return endpoint 

553 

554 return f"{self.request.protocol}://{self.request.host}{endpoint}" 

555 

556 @override 

557 def get_template_namespace(self) -> dict[str, Any]: 

558 """ 

559 Add useful things to the template namespace and return it. 

560 

561 They are mostly needed by most of the pages (like title, 

562 description and no_3rd_party). 

563 """ 

564 namespace = super().get_template_namespace() 

565 ansi2html = partial( 

566 Ansi2HTMLConverter(inline=True, scheme="xterm").convert, full=False 

567 ) 

568 namespace.update(self.user_settings.as_dict()) 

569 namespace.update( 

570 ansi2html=partial( 

571 reduce, apply, (ansi2html, ansi_replace, backspace_replace) 

572 ), 

573 apm_script=( 

574 self.settings["ELASTIC_APM"].get("INLINE_SCRIPT") 

575 if self.apm_enabled 

576 else None 

577 ), 

578 as_html=self.content_type == "text/html", 

579 c=self.now.date() == date(self.now.year, 4, 1) 

580 or str_to_bool(self.get_cookie("c", "f") or "f", False), 

581 canonical_url=self.fix_url( 

582 self.request.full_url().upper() 

583 if self.request.path.upper().startswith("/LOLWUT") 

584 else self.request.full_url().lower() 

585 ).split("?")[0], 

586 description=self.description, 

587 display_theme=self.get_display_theme(), 

588 display_scheme=self.get_display_scheme(), 

589 elastic_rum_url=self.ELASTIC_RUM_URL, 

590 fix_static=lambda path: self.fix_url(fix_static_path(path)), 

591 fix_url=self.fix_url, 

592 emoji2html=( 

593 emoji2html 

594 if self.user_settings.openmoji == "img" 

595 else ( 

596 (lambda emoji: f'<span class="openmoji">{emoji}</span>') 

597 if self.user_settings.openmoji 

598 else (lambda emoji: emoji) 

599 ) 

600 ), 

601 form_appendix=self.user_settings.get_form_appendix(), 

602 GH_ORG_URL=GH_ORG_URL, 

603 GH_PAGES_URL=GH_PAGES_URL, 

604 GH_REPO_URL=GH_REPO_URL, 

605 keywords="Asoziales Netzwerk, Känguru-Chroniken" 

606 + ( 

607 f", {self.module_info.get_keywords_as_str(self.request.path)}" 

608 if self.module_info # type: ignore[truthy-bool] 

609 else "" 

610 ), 

611 lang="de", # TODO: add language support 

612 nonce=self.nonce, 

613 now=self.now, 

614 openmoji_version=OPENMOJI_VERSION, 

615 settings=self.settings, 

616 short_title=self.short_title, 

617 testing=pytest_is_running(), 

618 title=self.title, 

619 ) 

620 namespace.update( 

621 { 

622 "🥚": timedelta() 

623 <= self.now.date() - easter(self.now.year) 

624 < timedelta(days=2), 

625 "🦘": is_prime(self.now.microsecond), 

626 } 

627 ) 

628 return namespace 

629 

630 async def get_time(self) -> datetime: 

631 """Get the start time of the request in the users' timezone.""" 

632 tz: tzinfo = timezone.utc 

633 try: 

634 geoip = await self.geoip() # pylint: disable=redefined-outer-name 

635 except (ApiError, TransportError): 

636 LOGGER.exception("Elasticsearch request failed") 

637 if self.apm_client: 

638 self.apm_client.capture_exception() # type: ignore[no-untyped-call] 

639 else: 

640 if geoip and "timezone" in geoip: 

641 tz = ZoneInfo(geoip["timezone"]) 

642 return datetime.fromtimestamp( 

643 self.request._start_time, tz=tz # pylint: disable=protected-access 

644 ) 

645 

646 def get_user_id(self) -> str: 

647 """Get the user id saved in the cookie or create one.""" 

648 cookie = self.get_secure_cookie( 

649 "user_id", 

650 max_age_days=90, 

651 min_version=2, 

652 ) 

653 

654 user_id = cookie.decode("UTF-8") if cookie else str(uuid.uuid4()) 

655 

656 if not self.get_secure_cookie( # save it in cookie or reset expiry date 

657 "user_id", max_age_days=30, min_version=2 

658 ): 

659 self.set_secure_cookie( 

660 "user_id", 

661 user_id, 

662 expires_days=90, 

663 path="/", 

664 samesite="Strict", 

665 ) 

666 

667 return user_id 

668 

669 def handle_accept_header( # pylint: disable=inconsistent-return-statements 

670 self, possible_content_types: tuple[str, ...], strict: bool = True 

671 ) -> None: 

672 """Handle the Accept header and set `self.content_type`.""" 

673 if not possible_content_types: 

674 return 

675 content_type = get_best_match( 

676 self.request.headers.get("Accept") or "*/*", 

677 possible_content_types, 

678 ) 

679 if content_type is None: 

680 if strict: 

681 return self.handle_not_acceptable(possible_content_types) 

682 content_type = possible_content_types[0] 

683 self.content_type = content_type 

684 self.set_content_type_header() 

685 

686 def handle_not_acceptable( 

687 self, possible_content_types: tuple[str, ...] 

688 ) -> None: 

689 """Only call this if we cannot respect the Accept header.""" 

690 self.clear_header("Content-Type") 

691 self.set_status(406) 

692 raise Finish("\n".join(possible_content_types) + "\n") 

693 

694 def head(self, *args: Any, **kwargs: Any) -> None | Awaitable[None]: 

695 """Handle HEAD requests.""" 

696 if self.get.__module__ == "tornado.web": 

697 raise HTTPError(405) 

698 if not self.supports_head(): 

699 raise HTTPError(501) 

700 

701 kwargs["head"] = True 

702 return self.get(*args, **kwargs) 

703 

704 @override 

705 def initialize( 

706 self, 

707 *, 

708 module_info: ModuleInfo, 

709 # default is true, because then empty args dicts are 

710 # enough to specify that the defaults should be used 

711 default_title: bool = True, 

712 default_description: bool = True, 

713 ) -> None: 

714 """ 

715 Get title and description from the kwargs. 

716 

717 If title and description are present in the kwargs, 

718 then they override self.title and self.description. 

719 """ 

720 self.module_info = module_info 

721 if not default_title: 

722 page_info = self.module_info.get_page_info(self.request.path) 

723 self.title = page_info.name 

724 self.short_title = page_info.short_name or self.title 

725 if not default_description: 

726 self.description = self.module_info.get_page_info( 

727 self.request.path 

728 ).description 

729 

730 def is_authorized( 

731 self, permission: Permission, allow_cookie_auth: bool = True 

732 ) -> bool | None: 

733 """Check whether the request is authorized.""" 

734 return is_authorized(self, permission, allow_cookie_auth) 

735 

736 @cached_property 

737 def now(self) -> datetime: 

738 """Get the current time.""" 

739 # pylint: disable=method-hidden 

740 if pytest_is_running(): 

741 raise AssertionError("Now accessed before it was set") 

742 if self.request.method in self.SUPPORTED_METHODS: 

743 LOGGER.error("Now accessed before it was set", stacklevel=3) 

744 return datetime.fromtimestamp( 

745 self.request._start_time, # pylint: disable=protected-access 

746 tz=timezone.utc, 

747 ) 

748 

749 @override 

750 async def options(self, *args: Any, **kwargs: Any) -> None: 

751 """Handle OPTIONS requests.""" 

752 # pylint: disable=unused-argument 

753 self.set_header("Allow", ", ".join(self.get_allowed_methods())) 

754 self.set_status(204) 

755 await self.finish() 

756 

757 def origin_trial(self, token: bytes | str) -> bool: 

758 """Enable an experimental feature.""" 

759 # pylint: disable=protected-access 

760 payload = json.loads(b64decode(token)[69:]) 

761 if payload["feature"] in self.active_origin_trials: 

762 return True 

763 origin = urlsplit(payload["origin"]) 

764 url = urlsplit(self.request.full_url()) 

765 if url.port is None and url.scheme in {"http", "https"}: 

766 url = url._replace( 

767 netloc=f"{url.hostname}:{443 if url.scheme == 'https' else 80}" 

768 ) 

769 if self.request._start_time > payload["expiry"]: 

770 return False 

771 if url.scheme != origin.scheme: 

772 return False 

773 if url.netloc != origin.netloc and not ( 

774 payload.get("isSubdomain") 

775 and url.netloc.endswith(f".{origin.netloc}") 

776 ): 

777 return False 

778 self.add_header("Origin-Trial", token) 

779 self.active_origin_trials.add(payload["feature"]) 

780 return True 

781 

782 @override 

783 async def prepare(self) -> None: 

784 """Check authorization and call self.ratelimit().""" 

785 # pylint: disable=invalid-overridden-method 

786 self.now = await self.get_time() 

787 

788 if not self.ALLOW_COMPRESSION: 

789 for transform in self._transforms: 

790 if isinstance(transform, GZipContentEncoding): 

791 # pylint: disable=protected-access 

792 transform._gzipping = False 

793 

794 if crawler_secret := self.settings.get("CRAWLER_SECRET"): 

795 self.crawler = crawler_secret in self.request.headers.get( 

796 "User-Agent", "" 

797 ) 

798 

799 self.handle_accept_header(self.POSSIBLE_CONTENT_TYPES) 

800 

801 if ( 

802 self.request.method in {"GET", "HEAD"} 

803 and self.redirect_to_canonical_domain() 

804 ): 

805 return 

806 

807 if self.request.method == "GET" and ( 

808 days := Random(self.now.timestamp()).randint(0, 31337) 

809 ) in { 

810 69, 

811 420, 

812 1337, 

813 31337, 

814 }: 

815 self.set_cookie("c", "s", expires_days=days / 24, path="/") 

816 

817 if self.request.method != "OPTIONS": 

818 if ( 

819 self.MAX_BODY_SIZE is not None 

820 and len(self.request.body) > self.MAX_BODY_SIZE 

821 ): 

822 LOGGER.warning( 

823 "%s > MAX_BODY_SIZE (%s)", 

824 len(self.request.body), 

825 self.MAX_BODY_SIZE, 

826 ) 

827 raise HTTPError(413) 

828 

829 if not await self.ratelimit(True): 

830 await self.ratelimit() 

831 

832 async def ratelimit(self, global_ratelimit: bool = False) -> bool: 

833 """Take b1nzy to space using Redis.""" 

834 if ( 

835 not self.settings.get("RATELIMITS") 

836 or self.request.method == "OPTIONS" 

837 or self.is_authorized(Permission.RATELIMITS) 

838 or self.crawler 

839 ): 

840 return False 

841 

842 if not EVENT_REDIS.is_set(): 

843 LOGGER.warning( 

844 ( 

845 "Ratelimits are enabled, but Redis is not available. " 

846 "This can happen shortly after starting the website." 

847 ), 

848 ) 

849 raise HTTPError(503) 

850 

851 if global_ratelimit: 

852 ratelimited, headers = await ratelimit( 

853 self.redis, 

854 self.redis_prefix, 

855 str(self.request.remote_ip), 

856 bucket=None, 

857 max_burst=99, # limit = 100 

858 count_per_period=20, # 20 requests per second 

859 period=1, 

860 tokens=10 if self.settings.get("UNDER_ATTACK") else 1, 

861 ) 

862 else: 

863 method = ( 

864 "GET" if self.request.method == "HEAD" else self.request.method 

865 ) 

866 if not (limit := getattr(self, f"RATELIMIT_{method}_LIMIT", 0)): 

867 return False 

868 ratelimited, headers = await ratelimit( 

869 self.redis, 

870 self.redis_prefix, 

871 str(self.request.remote_ip), 

872 bucket=getattr( 

873 self, 

874 f"RATELIMIT_{method}_BUCKET", 

875 self.__class__.__name__.lower(), 

876 ), 

877 max_burst=limit - 1, 

878 count_per_period=getattr( # request count per period 

879 self, 

880 f"RATELIMIT_{method}_COUNT_PER_PERIOD", 

881 30, 

882 ), 

883 period=getattr( 

884 self, f"RATELIMIT_{method}_PERIOD", 60 # period in seconds 

885 ), 

886 tokens=1 if self.request.method != "HEAD" else 0, 

887 ) 

888 

889 for header, value in headers.items(): 

890 self.set_header(header, value) 

891 

892 if ratelimited: 

893 if self.now.date() == date(self.now.year, 4, 20): 

894 self.set_status(420) 

895 self.write_error(420) 

896 else: 

897 self.set_status(429) 

898 self.write_error(429) 

899 

900 return ratelimited 

901 

902 def redirect_to_canonical_domain(self) -> bool: 

903 """Redirect to the canonical domain.""" 

904 if ( 

905 not (domain := self.settings.get("DOMAIN")) 

906 or not self.request.headers.get("Host") 

907 or self.request.host_name == domain 

908 or self.request.host_name.endswith((".onion", ".i2p")) 

909 or regex.fullmatch(r"/[\u2800-\u28FF]+/?", self.request.path) 

910 ): 

911 return False 

912 port = urlsplit(f"//{self.request.headers['Host']}").port 

913 self.redirect( 

914 urlsplit(self.request.full_url()) 

915 ._replace(netloc=f"{domain}:{port}" if port else domain) 

916 .geturl(), 

917 permanent=True, 

918 ) 

919 return True 

920 

921 @property 

922 def redis(self) -> Redis[str]: 

923 """ 

924 Get the Redis client from the settings. 

925 

926 This is None if Redis is not enabled. 

927 """ 

928 return cast("Redis[str]", self.settings.get("REDIS")) 

929 

930 @property 

931 def redis_prefix(self) -> str: 

932 """Get the Redis prefix from the settings.""" 

933 return self.settings.get( # type: ignore[no-any-return] 

934 "REDIS_PREFIX", NAME 

935 ) 

936 

937 @override 

938 def render( # noqa: D102 

939 self, template_name: str, **kwargs: Any 

940 ) -> Future[None]: 

941 self.used_render = True 

942 return super().render(template_name, **kwargs) 

943 

944 render.__doc__ = _RequestHandler.render.__doc__ 

945 

946 def set_content_type_header(self) -> None: 

947 """Set the Content-Type header based on `self.content_type`.""" 

948 if str(self.content_type).startswith("text/"): # RFC 2616 (3.7.1) 

949 self.set_header( 

950 "Content-Type", f"{self.content_type};charset=utf-8" 

951 ) 

952 elif self.content_type is not None: 

953 self.set_header("Content-Type", self.content_type) 

954 

955 @override 

956 def set_cookie( # noqa: D102 # pylint: disable=too-many-arguments 

957 self, 

958 name: str, 

959 value: str | bytes, 

960 domain: None | str = None, 

961 expires: None | float | tuple[int, ...] | datetime = None, 

962 path: str = "/", 

963 expires_days: None | float = 400, # changed 

964 *, 

965 secure: bool | None = None, 

966 httponly: bool = True, 

967 **kwargs: Any, 

968 ) -> None: 

969 if "samesite" not in kwargs: 

970 # default for same site should be strict 

971 kwargs["samesite"] = "Strict" 

972 

973 super().set_cookie( 

974 name, 

975 value, 

976 domain, 

977 expires, 

978 path, 

979 expires_days, 

980 secure=( 

981 self.request.protocol == "https" if secure is None else secure 

982 ), 

983 httponly=httponly, 

984 **kwargs, 

985 ) 

986 

987 set_cookie.__doc__ = _RequestHandler.set_cookie.__doc__ 

988 

989 def set_csp_header(self) -> None: 

990 """Set the Content-Security-Policy header.""" 

991 self.nonce = secrets.token_urlsafe(16) 

992 

993 script_src = ["'self'", f"'nonce-{self.nonce}'"] 

994 

995 if ( 

996 self.apm_enabled 

997 and "INLINE_SCRIPT_HASH" in self.settings["ELASTIC_APM"] 

998 ): 

999 script_src.extend( 

1000 ( 

1001 f"'sha256-{self.settings['ELASTIC_APM']['INLINE_SCRIPT_HASH']}'", 

1002 "'unsafe-inline'", # for browsers that don't support hash 

1003 ) 

1004 ) 

1005 

1006 connect_src = ["'self'"] 

1007 

1008 if self.apm_enabled and "SERVER_URL" in self.settings["ELASTIC_APM"]: 

1009 rum_server_url = self.settings["ELASTIC_APM"].get("RUM_SERVER_URL") 

1010 if rum_server_url: 

1011 # the RUM agent needs to connect to rum_server_url 

1012 connect_src.append(rum_server_url) 

1013 elif rum_server_url is None: 

1014 # the RUM agent needs to connect to ["ELASTIC_APM"]["SERVER_URL"] 

1015 connect_src.append(self.settings["ELASTIC_APM"]["SERVER_URL"]) 

1016 

1017 connect_src.append( # fix for older browsers 

1018 ("wss" if self.request.protocol == "https" else "ws") 

1019 + f"://{self.request.host}" 

1020 ) 

1021 

1022 self.set_header( 

1023 "Content-Security-Policy", 

1024 "default-src 'self';" 

1025 f"script-src {' '.join(script_src)};" 

1026 f"connect-src {' '.join(connect_src)};" 

1027 "style-src 'self' 'unsafe-inline';" 

1028 "img-src 'self' https://img.zeit.de https://github.asozial.org;" 

1029 "frame-ancestors 'self';" 

1030 "sandbox allow-downloads allow-same-origin allow-modals" 

1031 " allow-popups-to-escape-sandbox allow-scripts allow-popups" 

1032 " allow-top-navigation-by-user-activation allow-forms;" 

1033 "report-to default;" 

1034 "base-uri 'none';" 

1035 + ( 

1036 f"report-uri {self.get_reporting_api_endpoint()};" 

1037 if self.settings.get("REPORTING") 

1038 else "" 

1039 ), 

1040 ) 

1041 

1042 @override 

1043 def set_default_headers(self) -> None: 

1044 """Set default headers.""" 

1045 self.set_csp_header() 

1046 self.active_origin_trials = set() 

1047 if self.settings.get("REPORTING"): 

1048 endpoint = self.get_reporting_api_endpoint() 

1049 self.set_header( 

1050 "Reporting-Endpoints", 

1051 f'default="{endpoint}"', # noqa: B907 

1052 ) 

1053 self.set_header( 

1054 "Report-To", 

1055 json.dumps( 

1056 { 

1057 "group": "default", 

1058 "max_age": 2592000, 

1059 "endpoints": [{"url": endpoint}], 

1060 }, 

1061 option=ORJSON_OPTIONS, 

1062 ), 

1063 ) 

1064 self.set_header("NEL", '{"report_to":"default","max_age":2592000}') 

1065 self.set_header("X-Content-Type-Options", "nosniff") 

1066 self.set_header("Access-Control-Max-Age", "7200") 

1067 self.set_header("Access-Control-Allow-Origin", "*") 

1068 self.set_header("Access-Control-Allow-Headers", "*") 

1069 self.set_header( 

1070 "Access-Control-Allow-Methods", 

1071 ", ".join(self.get_allowed_methods()), 

1072 ) 

1073 self.set_header("Cross-Origin-Resource-Policy", "cross-origin") 

1074 self.set_header( 

1075 "Permissions-Policy", 

1076 "browsing-topics=()," 

1077 "identity-credentials-get=()," 

1078 "join-ad-interest-group=()," 

1079 "private-state-token-issuance=()," 

1080 "private-state-token-redemption=()," 

1081 "run-ad-auction=()", 

1082 ) 

1083 self.set_header("Referrer-Policy", "same-origin") 

1084 self.set_header( 

1085 "Cross-Origin-Opener-Policy", "same-origin; report-to=default" 

1086 ) 

1087 if self.request.path == "/kaenguru-comics-alt": # TODO: improve this 

1088 self.set_header( 

1089 "Cross-Origin-Embedder-Policy", 

1090 "credentialless; report-to=default", 

1091 ) 

1092 else: 

1093 self.set_header( 

1094 "Cross-Origin-Embedder-Policy", 

1095 "require-corp; report-to=default", 

1096 ) 

1097 if self.settings.get("HSTS"): 

1098 self.set_header("Strict-Transport-Security", "max-age=63072000") 

1099 if ( 

1100 onion_address := self.settings.get("ONION_ADDRESS") 

1101 ) and not self.request.host_name.endswith(".onion"): 

1102 self.set_header( 

1103 "Onion-Location", 

1104 onion_address 

1105 + self.request.path 

1106 + (f"?{self.request.query}" if self.request.query else ""), 

1107 ) 

1108 if self.settings.get("debug"): 

1109 self.set_header("X-Debug", bool_to_str(True)) 

1110 for permission in Permission: 

1111 if permission.name: 

1112 self.set_header( 

1113 f"X-Permission-{permission.name}", 

1114 bool_to_str(bool(self.is_authorized(permission))), 

1115 ) 

1116 self.set_header("Vary", "Accept, Authorization, Cookie") 

1117 

1118 set_default_headers.__doc__ = _RequestHandler.set_default_headers.__doc__ 

1119 

1120 @classmethod 

1121 def supports_head(cls) -> bool: 

1122 """Check whether this request handler supports HEAD requests.""" 

1123 signature = inspect.signature(cls.get) 

1124 return ( 

1125 "head" in signature.parameters 

1126 and signature.parameters["head"].kind 

1127 == inspect.Parameter.KEYWORD_ONLY 

1128 ) 

1129 

1130 @cached_property 

1131 def user_settings(self) -> Options: 

1132 """Get the user settings.""" 

1133 return Options(self) 

1134 

1135 @override 

1136 def write(self, chunk: str | bytes | dict[str, Any]) -> None: # noqa: D102 

1137 if self._finished: 

1138 raise RuntimeError("Cannot write() after finish()") 

1139 

1140 self.set_content_type_header() 

1141 

1142 if isinstance(chunk, dict): 

1143 chunk = self.dump(chunk) 

1144 

1145 if self.now.date() == date(self.now.year, 4, 27): 

1146 if isinstance(chunk, bytes): 

1147 with contextlib.suppress(UnicodeDecodeError): 

1148 chunk = chunk.decode("UTF-8") 

1149 if isinstance(chunk, str): 

1150 chunk = regex.sub( 

1151 r"\b\p{Lu}\p{Ll}{4}\p{Ll}*\b", 

1152 lambda match: ( 

1153 "Stanley" 

1154 if Random(match[0]).randrange(5) == self.now.year % 5 

1155 else match[0] 

1156 ), 

1157 chunk, 

1158 ) 

1159 

1160 super().write(chunk) 

1161 

1162 write.__doc__ = _RequestHandler.write.__doc__ 

1163 

1164 @override 

1165 def write_error(self, status_code: int, **kwargs: Any) -> None: 

1166 """Render the error page.""" 

1167 dict_content_types: tuple[str, str] = ( 

1168 "application/json", 

1169 "application/yaml", 

1170 ) 

1171 all_error_content_types: tuple[str, ...] = ( 

1172 # text/plain as first (default), to not screw up output in terminals 

1173 "text/plain", 

1174 "text/html", 

1175 "text/markdown", 

1176 *dict_content_types, 

1177 "application/vnd.asozial.dynload+json", 

1178 ) 

1179 

1180 if self.content_type not in all_error_content_types: 

1181 # don't send 406, instead default with text/plain 

1182 self.handle_accept_header(all_error_content_types, strict=False) 

1183 

1184 if self.content_type == "text/html": 

1185 self.render( # type: ignore[unused-awaitable] 

1186 "error.html", 

1187 status=status_code, 

1188 reason=self.get_error_message(**kwargs), 

1189 description=self.get_error_page_description(status_code), 

1190 is_traceback="exc_info" in kwargs 

1191 and not issubclass(kwargs["exc_info"][0], HTTPError) 

1192 and ( 

1193 self.settings.get("serve_traceback") 

1194 or self.is_authorized(Permission.TRACEBACK) 

1195 ), 

1196 ) 

1197 return 

1198 

1199 if self.content_type in dict_content_types: 

1200 self.finish( # type: ignore[unused-awaitable] 

1201 { 

1202 "status": status_code, 

1203 "reason": self.get_error_message(**kwargs), 

1204 } 

1205 ) 

1206 return 

1207 

1208 self.finish( # type: ignore[unused-awaitable] 

1209 f"{status_code} {self.get_error_message(**kwargs)}\n" 

1210 ) 

1211 

1212 write_error.__doc__ = _RequestHandler.write_error.__doc__