Coverage for an_website/utils/base_request_handler.py: 78.528%

489 statements  

« prev     ^ index     » next       coverage.py v7.9.1, created at 2025-07-07 20:06 +0000

1# This program is free software: you can redistribute it and/or modify 

2# it under the terms of the GNU Affero General Public License as 

3# published by the Free Software Foundation, either version 3 of the 

4# License, or (at your option) any later version. 

5# 

6# This program is distributed in the hope that it will be useful, 

7# but WITHOUT ANY WARRANTY; without even the implied warranty of 

8# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

9# GNU Affero General Public License for more details. 

10# 

11# You should have received a copy of the GNU Affero General Public License 

12# along with this program. If not, see <https://www.gnu.org/licenses/>. 

13# pylint: disable=too-many-lines 

14 

15""" 

16The base request handler used by other modules. 

17 

18This should only contain the BaseRequestHandler class. 

19""" 

20 

21from __future__ import annotations 

22 

23import contextlib 

24import inspect 

25import logging 

26import secrets 

27import sys 

28import traceback 

29import uuid 

30from asyncio import Future 

31from base64 import b64decode 

32from collections.abc import Awaitable, Callable, Coroutine 

33from contextvars import ContextVar 

34from datetime import date, datetime, timedelta, timezone, tzinfo 

35from functools import cached_property, partial, reduce 

36from random import Random, choice as random_choice 

37from types import TracebackType 

38from typing import Any, ClassVar, Final, cast, override 

39from urllib.parse import SplitResult, urlsplit, urlunsplit 

40from zoneinfo import ZoneInfo 

41 

42import elasticapm 

43import html2text 

44import orjson as json 

45import regex 

46import tornado.web 

47import yaml 

48from accept_types import get_best_match # type: ignore[import-untyped] 

49from ansi2html import Ansi2HTMLConverter 

50from bs4 import BeautifulSoup 

51from dateutil.easter import easter 

52from elastic_transport import ApiError, TransportError 

53from elasticsearch import AsyncElasticsearch 

54from openmoji_dist import VERSION as OPENMOJI_VERSION 

55from redis.asyncio import Redis 

56from tornado.httputil import HTTPServerRequest 

57from tornado.iostream import StreamClosedError 

58from tornado.web import ( 

59 Finish, 

60 GZipContentEncoding, 

61 HTTPError, 

62 MissingArgumentError, 

63 OutputTransform, 

64) 

65 

66from .. import ( 

67 EVENT_ELASTICSEARCH, 

68 EVENT_REDIS, 

69 GH_ORG_URL, 

70 GH_PAGES_URL, 

71 GH_REPO_URL, 

72 NAME, 

73 ORJSON_OPTIONS, 

74 pytest_is_running, 

75) 

76from .decorators import is_authorized 

77from .options import ColourScheme, Options 

78from .static_file_handling import FILE_HASHES_DICT, fix_static_path 

79from .themes import THEMES 

80from .utils import ( 

81 ModuleInfo, 

82 Permission, 

83 add_args_to_url, 

84 ansi_replace, 

85 apply, 

86 backspace_replace, 

87 bool_to_str, 

88 emoji2html, 

89 geoip, 

90 hash_bytes, 

91 is_prime, 

92 ratelimit, 

93 str_to_bool, 

94) 

95 

96LOGGER: Final = logging.getLogger(__name__) 

97 

98TEXT_CONTENT_TYPES: Final[set[str]] = { 

99 "application/javascript", 

100 "application/json", 

101 "application/vnd.asozial.dynload+json", 

102 "application/x-ndjson", 

103 "application/xml", 

104 "application/yaml", 

105} 

106 

107request_ctx_var: ContextVar[HTTPServerRequest] = ContextVar("current_request") 

108 

109 

110class _RequestHandler(tornado.web.RequestHandler): 

111 """Base for Tornado request handlers.""" 

112 

113 crawler: bool = False 

114 

115 @override 

116 async def _execute( 

117 self, transforms: list[OutputTransform], *args: bytes, **kwargs: bytes 

118 ) -> None: 

119 request_ctx_var.set(self.request) 

120 return await super()._execute(transforms, *args, **kwargs) 

121 

122 # pylint: disable-next=protected-access 

123 _execute.__doc__ = tornado.web.RequestHandler._execute.__doc__ 

124 

125 @property 

126 def apm_client(self) -> None | elasticapm.Client: 

127 """Get the APM client from the settings.""" 

128 return self.settings.get("ELASTIC_APM", {}).get("CLIENT") # type: ignore[no-any-return] 

129 

130 @property 

131 def apm_enabled(self) -> bool: 

132 """Return whether APM is enabled.""" 

133 return bool(self.settings.get("ELASTIC_APM", {}).get("ENABLED")) 

134 

135 @override 

136 def data_received( # noqa: D102 

137 self, chunk: bytes 

138 ) -> None | Awaitable[None]: 

139 pass 

140 

141 data_received.__doc__ = tornado.web.RequestHandler.data_received.__doc__ 

142 

143 @property 

144 def elasticsearch(self) -> AsyncElasticsearch: 

145 """ 

146 Get the Elasticsearch client from the settings. 

147 

148 This is None if Elasticsearch is not enabled. 

149 """ 

150 return cast(AsyncElasticsearch, self.settings.get("ELASTICSEARCH")) 

151 

152 @property 

153 def elasticsearch_prefix(self) -> str: 

154 """Get the Elasticsearch prefix from the settings.""" 

155 return self.settings.get( # type: ignore[no-any-return] 

156 "ELASTICSEARCH_PREFIX", NAME 

157 ) 

158 

159 def geoip( 

160 self, 

161 ip: None | str = None, 

162 database: str = geoip.__defaults__[0], # type: ignore[index] 

163 *, 

164 allow_fallback: bool = True, 

165 ) -> Coroutine[None, None, None | dict[str, Any]]: 

166 """Get GeoIP information.""" 

167 if not ip: 

168 ip = self.request.remote_ip 

169 if not EVENT_ELASTICSEARCH.is_set(): 

170 return geoip(ip, database) 

171 return geoip( 

172 ip, database, self.elasticsearch, allow_fallback=allow_fallback 

173 ) 

174 

175 async def get_time(self) -> datetime: 

176 """Get the start time of the request in the users' timezone.""" 

177 tz: tzinfo = timezone.utc 

178 try: 

179 geoip = await self.geoip() # pylint: disable=redefined-outer-name 

180 except (ApiError, TransportError): 

181 LOGGER.exception("Elasticsearch request failed") 

182 if self.apm_client: 

183 self.apm_client.capture_exception() # type: ignore[no-untyped-call] 

184 else: 

185 if geoip and "timezone" in geoip: 

186 tz = ZoneInfo(geoip["timezone"]) 

187 return datetime.fromtimestamp( 

188 self.request._start_time, tz=tz # pylint: disable=protected-access 

189 ) 

190 

191 def is_authorized( 

192 self, permission: Permission, allow_cookie_auth: bool = True 

193 ) -> bool | None: 

194 """Check whether the request is authorized.""" 

195 return is_authorized(self, permission, allow_cookie_auth) 

196 

197 @override 

198 def log_exception( 

199 self, 

200 typ: None | type[BaseException], 

201 value: None | BaseException, 

202 tb: None | TracebackType, 

203 ) -> None: 

204 if isinstance(value, HTTPError): 

205 super().log_exception(typ, value, tb) 

206 elif typ is StreamClosedError: 

207 LOGGER.debug( 

208 "Stream closed %s", 

209 self._request_summary(), 

210 exc_info=(typ, value, tb), # type: ignore[arg-type] 

211 ) 

212 else: 

213 LOGGER.error( 

214 "Uncaught exception %s", 

215 self._request_summary(), 

216 exc_info=(typ, value, tb), # type: ignore[arg-type] 

217 ) 

218 

219 log_exception.__doc__ = tornado.web.RequestHandler.log_exception.__doc__ 

220 

221 @cached_property 

222 def now(self) -> datetime: 

223 """Get the current time.""" 

224 # pylint: disable=method-hidden 

225 if pytest_is_running(): 

226 raise AssertionError("Now accessed before it was set") 

227 if self.request.method in self.SUPPORTED_METHODS: 

228 LOGGER.error("Now accessed before it was set", stacklevel=3) 

229 return datetime.fromtimestamp( 

230 self.request._start_time, # pylint: disable=protected-access 

231 tz=timezone.utc, 

232 ) 

233 

234 @override 

235 async def prepare(self) -> None: 

236 """Check authorization and call self.ratelimit().""" 

237 # pylint: disable=invalid-overridden-method 

238 self.now = await self.get_time() 

239 

240 if crawler_secret := self.settings.get("CRAWLER_SECRET"): 

241 self.crawler = crawler_secret in self.request.headers.get( 

242 "User-Agent", "" 

243 ) 

244 

245 if ( 

246 self.request.method in {"GET", "HEAD"} 

247 and self.redirect_to_canonical_domain() 

248 ): 

249 return 

250 

251 if self.request.method != "OPTIONS" and not await self.ratelimit(True): 

252 await self.ratelimit() 

253 

254 async def ratelimit(self, global_ratelimit: bool = False) -> bool: 

255 """Take b1nzy to space using Redis.""" 

256 if ( 

257 not self.settings.get("RATELIMITS") 

258 or self.request.method == "OPTIONS" 

259 or self.is_authorized(Permission.RATELIMITS) 

260 or self.crawler 

261 ): 

262 return False 

263 

264 if not EVENT_REDIS.is_set(): 

265 LOGGER.warning( 

266 ( 

267 "Ratelimits are enabled, but Redis is not available. " 

268 "This can happen shortly after starting the website." 

269 ), 

270 ) 

271 raise HTTPError(503) 

272 

273 if global_ratelimit: # TODO: add to _RequestHandler 

274 ratelimited, headers = await ratelimit( 

275 self.redis, 

276 self.redis_prefix, 

277 str(self.request.remote_ip), 

278 bucket=None, 

279 max_burst=99, # limit = 100 

280 count_per_period=20, # 20 requests per second 

281 period=1, 

282 tokens=10 if self.settings.get("UNDER_ATTACK") else 1, 

283 ) 

284 else: 

285 method = ( 

286 "GET" if self.request.method == "HEAD" else self.request.method 

287 ) 

288 if not (limit := getattr(self, f"RATELIMIT_{method}_LIMIT", 0)): 

289 return False 

290 ratelimited, headers = await ratelimit( 

291 self.redis, 

292 self.redis_prefix, 

293 str(self.request.remote_ip), 

294 bucket=getattr( 

295 self, 

296 f"RATELIMIT_{method}_BUCKET", 

297 self.__class__.__name__.lower(), 

298 ), 

299 max_burst=limit - 1, 

300 count_per_period=getattr( # request count per period 

301 self, 

302 f"RATELIMIT_{method}_COUNT_PER_PERIOD", 

303 30, 

304 ), 

305 period=getattr( 

306 self, f"RATELIMIT_{method}_PERIOD", 60 # period in seconds 

307 ), 

308 tokens=1 if self.request.method != "HEAD" else 0, 

309 ) 

310 

311 for header, value in headers.items(): 

312 self.set_header(header, value) 

313 

314 if ratelimited: 

315 if self.now.date() == date(self.now.year, 4, 20): 

316 self.set_status(420) 

317 self.write_error(420) 

318 else: 

319 self.set_status(429) 

320 self.write_error(429) 

321 

322 return ratelimited 

323 

324 def redirect_to_canonical_domain(self) -> bool: 

325 """Redirect to the canonical domain.""" 

326 if ( 

327 not (domain := self.settings.get("DOMAIN")) 

328 or not self.request.headers.get("Host") 

329 or self.request.host_name == domain 

330 or self.request.host_name.endswith((".onion", ".i2p")) 

331 or regex.fullmatch(r"/[\u2800-\u28FF]+/?", self.request.path) 

332 ): 

333 return False 

334 port = urlsplit(f"//{self.request.headers['Host']}").port 

335 self.redirect( 

336 urlsplit(self.request.full_url()) 

337 ._replace(netloc=f"{domain}:{port}" if port else domain) 

338 .geturl(), 

339 permanent=True, 

340 ) 

341 return True 

342 

343 @property 

344 def redis(self) -> Redis[str]: 

345 """ 

346 Get the Redis client from the settings. 

347 

348 This is None if Redis is not enabled. 

349 """ 

350 return cast("Redis[str]", self.settings.get("REDIS")) 

351 

352 @property 

353 def redis_prefix(self) -> str: 

354 """Get the Redis prefix from the settings.""" 

355 return self.settings.get( # type: ignore[no-any-return] 

356 "REDIS_PREFIX", NAME 

357 ) 

358 

359 

360class BaseRequestHandler(_RequestHandler): 

361 """The base request handler used by every page and API.""" 

362 

363 # pylint: disable=too-many-instance-attributes, too-many-public-methods 

364 

365 ELASTIC_RUM_URL: ClassVar[str] = ( 

366 f"/@apm-rum/elastic-apm-rum.umd{'' if sys.flags.dev_mode else '.min'}.js" 

367 "?v=5.12.0" 

368 ) 

369 

370 COMPUTE_ETAG: ClassVar[bool] = True 

371 ALLOW_COMPRESSION: ClassVar[bool] = True 

372 MAX_BODY_SIZE: ClassVar[None | int] = None 

373 ALLOWED_METHODS: ClassVar[tuple[str, ...]] = ("GET",) 

374 POSSIBLE_CONTENT_TYPES: ClassVar[tuple[str, ...]] = () 

375 

376 module_info: ModuleInfo 

377 # info about page, can be overridden in module_info 

378 title: str = "Das Asoziale Netzwerk" 

379 short_title: str = "Asoziales Netzwerk" 

380 description: str = "Die tolle Webseite des Asozialen Netzwerks" 

381 

382 used_render: bool = False 

383 

384 active_origin_trials: set[str] 

385 content_type: None | str = None 

386 apm_script: None | str 

387 nonce: str 

388 

389 def _finish( 

390 self, chunk: None | str | bytes | dict[str, Any] = None 

391 ) -> Future[None]: 

392 if self._finished: 

393 raise RuntimeError("finish() called twice") 

394 

395 if chunk is not None: 

396 self.write(chunk) 

397 

398 if ( # pylint: disable=too-many-boolean-expressions 

399 (content_type := self.content_type) 

400 and ( 

401 content_type in TEXT_CONTENT_TYPES 

402 or content_type.startswith("text/") 

403 or content_type.endswith(("+xml", "+json")) 

404 ) 

405 and self._write_buffer 

406 and not self._write_buffer[-1].endswith(b"\n") 

407 ): 

408 self.write(b"\n") 

409 

410 return super().finish() 

411 

412 @override 

413 def compute_etag(self) -> None | str: 

414 """Compute ETag with Base85 encoding.""" 

415 if not self.COMPUTE_ETAG: 

416 return None 

417 return f'"{hash_bytes(*self._write_buffer)}"' # noqa: B907 

418 

419 @override 

420 def decode_argument( # noqa: D102 

421 self, value: bytes, name: str | None = None 

422 ) -> str: 

423 try: 

424 return value.decode("UTF-8", "replace") 

425 except UnicodeDecodeError as exc: 

426 err_msg = f"Invalid unicode in {name or 'url'}: {value[:40]!r}" 

427 LOGGER.exception(err_msg, exc_info=exc) 

428 raise HTTPError(400, err_msg) from exc 

429 

430 @property 

431 def dump(self) -> Callable[[Any], str | bytes]: 

432 """Get the function for dumping the output.""" 

433 yaml_subset = self.content_type in { 

434 "application/json", 

435 "application/vnd.asozial.dynload+json", 

436 } 

437 

438 if self.content_type == "application/yaml": 

439 if self.now.timetuple()[2:0:-1] == (1, 4): 

440 yaml_subset = True 

441 else: 

442 return lambda spam: yaml.dump( 

443 spam, 

444 width=self.get_int_argument("yaml_width", 80, min_=80), 

445 ) 

446 

447 if yaml_subset: 

448 option = ORJSON_OPTIONS 

449 if self.get_bool_argument("pretty", False): 

450 option |= json.OPT_INDENT_2 

451 return lambda spam: json.dumps(spam, option=option) 

452 

453 return lambda spam: spam 

454 

455 @override 

456 def finish( # noqa: D102 

457 self, chunk: None | str | bytes | dict[Any, Any] = None 

458 ) -> Future[None]: 

459 as_json = self.content_type == "application/vnd.asozial.dynload+json" 

460 as_plain_text = self.content_type == "text/plain" 

461 as_markdown = self.content_type == "text/markdown" 

462 

463 if ( 

464 not isinstance(chunk, bytes | str) 

465 or self.content_type == "text/html" 

466 or not self.used_render 

467 or not (as_json or as_plain_text or as_markdown) 

468 ): 

469 return self._finish(chunk) 

470 

471 chunk = chunk.decode("UTF-8") if isinstance(chunk, bytes) else chunk 

472 

473 if as_markdown: 

474 return self._finish( 

475 f"# {self.title}\n\n" 

476 + html2text.html2text(chunk, self.request.full_url()).strip() 

477 ) 

478 

479 soup = BeautifulSoup(chunk, features="lxml") 

480 

481 if as_plain_text: 

482 return self._finish(soup.get_text("\n", True)) 

483 

484 dictionary: dict[str, object] = { 

485 "url": self.fix_url(), 

486 "title": self.title, 

487 "short_title": ( 

488 self.short_title if self.title != self.short_title else None 

489 ), 

490 "body": "".join( 

491 str(element) 

492 for element in soup.find_all(name="main")[0].contents 

493 ).strip(), 

494 "scripts": [ 

495 {"script": script.string} | script.attrs 

496 for script in soup.find_all("script") 

497 ], 

498 "stylesheets": [ 

499 stylesheet.get("href").strip() 

500 for stylesheet in soup.find_all("link", rel="stylesheet") 

501 ], 

502 "css": "\n".join(style.string for style in soup.find_all("style")), 

503 } 

504 

505 return self._finish(dictionary) 

506 

507 finish.__doc__ = _RequestHandler.finish.__doc__ 

508 

509 def finish_dict(self, **kwargs: Any) -> Future[None]: 

510 """Finish the request with a dictionary.""" 

511 return self.finish(kwargs) 

512 

513 def fix_url( 

514 self, 

515 url: None | str | SplitResult = None, 

516 new_path: None | str = None, 

517 **query_args: None | str | bool | float, 

518 ) -> str: 

519 """ 

520 Fix a URL and return it. 

521 

522 If the URL is from another website, link to it with the redirect page, 

523 otherwise just return the URL with no_3rd_party appended. 

524 """ 

525 if url is None: 

526 url = self.request.full_url() 

527 if isinstance(url, str): 

528 url = urlsplit(url) 

529 if url.netloc and url.netloc.lower() != self.request.host.lower(): 

530 if ( 

531 not self.user_settings.ask_before_leaving 

532 or not self.settings.get("REDIRECT_MODULE_LOADED") 

533 ): 

534 return url.geturl() 

535 path = "/redirect" 

536 query_args["to"] = url.geturl() 

537 url = urlsplit(self.request.full_url()) 

538 else: 

539 path = url.path if new_path is None else new_path 

540 path = f"/{path.strip('/')}".lower() 

541 if path == "/lolwut": 

542 path = path.upper() 

543 if path.startswith("/soundboard/files/") or path in FILE_HASHES_DICT: 

544 query_args.update( 

545 dict.fromkeys(self.user_settings.iter_option_names()) 

546 ) 

547 else: 

548 for ( 

549 key, 

550 value, 

551 ) in self.user_settings.as_dict_with_str_values().items(): 

552 query_args.setdefault(key, value) 

553 for key, value in self.user_settings.as_dict_with_str_values( 

554 include_query_argument=False, 

555 include_body_argument=self.request.path == "/einstellungen" 

556 and self.get_bool_argument("save_in_cookie", False), 

557 ).items(): 

558 if value == query_args[key]: 

559 query_args[key] = None 

560 

561 return add_args_to_url( 

562 urlunsplit( 

563 ( 

564 self.request.protocol, 

565 self.request.host, 

566 "" if path == "/" else path, 

567 url.query, 

568 url.fragment, 

569 ) 

570 ), 

571 **query_args, 

572 ) 

573 

574 @classmethod 

575 def get_allowed_methods(cls) -> list[str]: 

576 """Get allowed methods.""" 

577 methods = {"OPTIONS", *cls.ALLOWED_METHODS} 

578 if "GET" in cls.ALLOWED_METHODS and cls.supports_head(): 

579 methods.add("HEAD") 

580 return sorted(methods) 

581 

582 def get_bool_argument( 

583 self, 

584 name: str, 

585 default: None | bool = None, 

586 ) -> bool: 

587 """Get an argument parsed as boolean.""" 

588 if default is not None: 

589 return str_to_bool(self.get_argument(name, ""), default) 

590 value = str(self.get_argument(name)) 

591 try: 

592 return str_to_bool(value) 

593 except ValueError as err: 

594 raise HTTPError(400, f"{value} is not a boolean") from err 

595 

596 def get_display_scheme(self) -> ColourScheme: 

597 """Get the scheme currently displayed.""" 

598 scheme = self.user_settings.scheme 

599 if scheme == "random": 

600 return ("light", "dark")[self.now.microsecond & 1] 

601 return scheme 

602 

603 def get_display_theme(self) -> str: 

604 """Get the theme currently displayed.""" 

605 theme = self.user_settings.theme 

606 

607 if theme == "default" and self.now.month == 12: 

608 return "christmas" 

609 

610 if theme != "random": 

611 return theme 

612 

613 ignore_themes = ("random", "christmas") 

614 

615 return random_choice( # nosec: B311 

616 tuple(theme for theme in THEMES if theme not in ignore_themes) 

617 ) 

618 

619 def get_error_message(self, **kwargs: Any) -> str: 

620 """ 

621 Get the error message and return it. 

622 

623 If the serve_traceback setting is true (debug mode is activated), 

624 the traceback gets returned. 

625 """ 

626 if "exc_info" in kwargs and not issubclass( 

627 kwargs["exc_info"][0], HTTPError 

628 ): 

629 if self.settings.get("serve_traceback") or self.is_authorized( 

630 Permission.TRACEBACK 

631 ): 

632 return "".join( 

633 traceback.format_exception(*kwargs["exc_info"]) 

634 ).strip() 

635 return "".join( 

636 traceback.format_exception_only(*kwargs["exc_info"][:2]) 

637 ).strip() 

638 if "exc_info" in kwargs and issubclass( 

639 kwargs["exc_info"][0], MissingArgumentError 

640 ): 

641 return cast(str, kwargs["exc_info"][1].log_message) 

642 return str(self._reason) 

643 

644 def get_error_page_description(self, status_code: int) -> str: 

645 """Get the description for the error page.""" 

646 # pylint: disable=too-many-return-statements 

647 # https://developer.mozilla.org/docs/Web/HTTP/Status 

648 if 100 <= status_code <= 199: 

649 return "Hier gibt es eine total wichtige Information." 

650 if 200 <= status_code <= 299: 

651 return "Hier ist alles super! 🎶🎶" 

652 if 300 <= status_code <= 399: 

653 return "Eine Umleitung ist eingerichtet." 

654 if 400 <= status_code <= 499: 

655 if status_code == 404: 

656 return f"{self.request.path} wurde nicht gefunden." 

657 if status_code == 451: 

658 return "Hier wäre bestimmt geiler Scheiß." 

659 return "Ein Client-Fehler ist aufgetreten." 

660 if 500 <= status_code <= 599: 

661 return "Ein Server-Fehler ist aufgetreten." 

662 raise ValueError( 

663 f"{status_code} is not a valid HTTP response status code." 

664 ) 

665 

666 def get_int_argument( 

667 self, 

668 name: str, 

669 default: None | int = None, 

670 *, 

671 max_: None | int = None, 

672 min_: None | int = None, 

673 ) -> int: 

674 """Get an argument parsed as integer.""" 

675 if default is None: 

676 str_value = self.get_argument(name) 

677 try: 

678 value = int(str_value, base=0) 

679 except ValueError as err: 

680 raise HTTPError(400, f"{str_value} is not an integer") from err 

681 elif self.get_argument(name, ""): 

682 try: 

683 value = int(self.get_argument(name), base=0) 

684 except ValueError: 

685 value = default 

686 else: 

687 value = default 

688 

689 if max_ is not None: 

690 value = min(max_, value) 

691 if min_ is not None: 

692 value = max(min_, value) 

693 

694 return value 

695 

696 def get_module_infos(self) -> tuple[ModuleInfo, ...]: 

697 """Get the module infos.""" 

698 return self.settings.get("MODULE_INFOS") or () 

699 

700 def get_reporting_api_endpoint(self) -> None | str: 

701 """Get the endpoint for the Reporting API™️.""" 

702 if not self.settings.get("REPORTING"): 

703 return None 

704 endpoint = self.settings.get("REPORTING_ENDPOINT") 

705 

706 if not endpoint or not endpoint.startswith("/"): 

707 return endpoint 

708 

709 return f"{self.request.protocol}://{self.request.host}{endpoint}" 

710 

711 @override 

712 def get_template_namespace(self) -> dict[str, Any]: 

713 """ 

714 Add useful things to the template namespace and return it. 

715 

716 They are mostly needed by most of the pages (like title, 

717 description and no_3rd_party). 

718 """ 

719 namespace = super().get_template_namespace() 

720 ansi2html = partial( 

721 Ansi2HTMLConverter(inline=True, scheme="xterm").convert, full=False 

722 ) 

723 namespace.update(self.user_settings.as_dict()) 

724 namespace.update( 

725 ansi2html=partial( 

726 reduce, apply, (ansi2html, ansi_replace, backspace_replace) 

727 ), 

728 apm_script=( 

729 self.settings["ELASTIC_APM"].get("INLINE_SCRIPT") 

730 if self.apm_enabled 

731 else None 

732 ), 

733 as_html=self.content_type == "text/html", 

734 c=self.now.date() == date(self.now.year, 4, 1) 

735 or str_to_bool(self.get_cookie("c", "f") or "f", False), 

736 canonical_url=self.fix_url( 

737 self.request.full_url().upper() 

738 if self.request.path.upper().startswith("/LOLWUT") 

739 else self.request.full_url().lower() 

740 ).split("?")[0], 

741 description=self.description, 

742 display_theme=self.get_display_theme(), 

743 display_scheme=self.get_display_scheme(), 

744 elastic_rum_url=self.ELASTIC_RUM_URL, 

745 fix_static=lambda path: self.fix_url(fix_static_path(path)), 

746 fix_url=self.fix_url, 

747 emoji2html=( 

748 emoji2html 

749 if self.user_settings.openmoji == "img" 

750 else ( 

751 (lambda emoji: f'<span class="openmoji">{emoji}</span>') 

752 if self.user_settings.openmoji 

753 else (lambda emoji: f"<span>{emoji}</span>") 

754 ) 

755 ), 

756 form_appendix=self.user_settings.get_form_appendix(), 

757 GH_ORG_URL=GH_ORG_URL, 

758 GH_PAGES_URL=GH_PAGES_URL, 

759 GH_REPO_URL=GH_REPO_URL, 

760 keywords="Asoziales Netzwerk, Känguru-Chroniken" 

761 + ( 

762 f", {self.module_info.get_keywords_as_str(self.request.path)}" 

763 if self.module_info # type: ignore[truthy-bool] 

764 else "" 

765 ), 

766 lang="de", # TODO: add language support 

767 nonce=self.nonce, 

768 now=self.now, 

769 openmoji_version=OPENMOJI_VERSION, 

770 settings=self.settings, 

771 short_title=self.short_title, 

772 testing=pytest_is_running(), 

773 title=self.title, 

774 ) 

775 namespace.update( 

776 { 

777 "🥚": timedelta() 

778 <= self.now.date() - easter(self.now.year) 

779 < timedelta(days=2), 

780 "🦘": is_prime(self.now.microsecond), 

781 } 

782 ) 

783 return namespace 

784 

785 def get_user_id(self) -> str: 

786 """Get the user id saved in the cookie or create one.""" 

787 cookie = self.get_secure_cookie( 

788 "user_id", 

789 max_age_days=90, 

790 min_version=2, 

791 ) 

792 

793 user_id = cookie.decode("UTF-8") if cookie else str(uuid.uuid4()) 

794 

795 if not self.get_secure_cookie( # save it in cookie or reset expiry date 

796 "user_id", max_age_days=30, min_version=2 

797 ): 

798 self.set_secure_cookie( 

799 "user_id", 

800 user_id, 

801 expires_days=90, 

802 path="/", 

803 samesite="Strict", 

804 ) 

805 

806 return user_id 

807 

808 def handle_accept_header( # pylint: disable=inconsistent-return-statements 

809 self, possible_content_types: tuple[str, ...], strict: bool = True 

810 ) -> None: 

811 """Handle the Accept header and set `self.content_type`.""" 

812 if not possible_content_types: 

813 return 

814 content_type = get_best_match( 

815 self.request.headers.get("Accept") or "*/*", 

816 possible_content_types, 

817 ) 

818 if content_type is None: 

819 if strict: 

820 return self.handle_not_acceptable(possible_content_types) 

821 content_type = possible_content_types[0] 

822 self.content_type = content_type 

823 self.set_content_type_header() 

824 

825 def handle_not_acceptable( 

826 self, possible_content_types: tuple[str, ...] 

827 ) -> None: 

828 """Only call this if we cannot respect the Accept header.""" 

829 self.clear_header("Content-Type") 

830 self.set_status(406) 

831 raise Finish("\n".join(possible_content_types) + "\n") 

832 

833 def head(self, *args: Any, **kwargs: Any) -> None | Awaitable[None]: 

834 """Handle HEAD requests.""" 

835 if self.get.__module__ == "tornado.web": 

836 raise HTTPError(405) 

837 if not self.supports_head(): 

838 raise HTTPError(501) 

839 

840 kwargs["head"] = True 

841 return self.get(*args, **kwargs) 

842 

843 @override 

844 def initialize( 

845 self, 

846 *, 

847 module_info: ModuleInfo, 

848 # default is true, because then empty args dicts are 

849 # enough to specify that the defaults should be used 

850 default_title: bool = True, 

851 default_description: bool = True, 

852 ) -> None: 

853 """ 

854 Get title and description from the kwargs. 

855 

856 If title and description are present in the kwargs, 

857 then they override self.title and self.description. 

858 """ 

859 self.module_info = module_info 

860 if not default_title: 

861 page_info = self.module_info.get_page_info(self.request.path) 

862 self.title = page_info.name 

863 self.short_title = page_info.short_name or self.title 

864 if not default_description: 

865 self.description = self.module_info.get_page_info( 

866 self.request.path 

867 ).description 

868 

869 @override 

870 async def options(self, *args: Any, **kwargs: Any) -> None: 

871 """Handle OPTIONS requests.""" 

872 # pylint: disable=unused-argument 

873 self.set_header("Allow", ", ".join(self.get_allowed_methods())) 

874 self.set_status(204) 

875 await self.finish() 

876 

877 def origin_trial(self, token: bytes | str) -> bool: 

878 """Enable an experimental feature.""" 

879 # pylint: disable=protected-access 

880 payload = json.loads(b64decode(token)[69:]) 

881 if payload["feature"] in self.active_origin_trials: 

882 return True 

883 origin = urlsplit(payload["origin"]) 

884 url = urlsplit(self.request.full_url()) 

885 if url.port is None and url.scheme in {"http", "https"}: 

886 url = url._replace( 

887 netloc=f"{url.hostname}:{443 if url.scheme == 'https' else 80}" 

888 ) 

889 if self.request._start_time > payload["expiry"]: 

890 return False 

891 if url.scheme != origin.scheme: 

892 return False 

893 if url.netloc != origin.netloc and not ( 

894 payload.get("isSubdomain") 

895 and url.netloc.endswith(f".{origin.netloc}") 

896 ): 

897 return False 

898 self.add_header("Origin-Trial", token) 

899 self.active_origin_trials.add(payload["feature"]) 

900 return True 

901 

902 @override 

903 async def prepare(self) -> None: 

904 """Check authorization and call self.ratelimit().""" 

905 await super().prepare() 

906 

907 if self._finished: 

908 return 

909 

910 if not self.ALLOW_COMPRESSION: 

911 for transform in self._transforms: 

912 if isinstance(transform, GZipContentEncoding): 

913 # pylint: disable=protected-access 

914 transform._gzipping = False 

915 

916 self.handle_accept_header(self.POSSIBLE_CONTENT_TYPES) 

917 

918 if self.request.method == "GET" and ( 

919 days := Random(self.now.timestamp()).randint(0, 31337) 

920 ) in { 

921 69, 

922 420, 

923 1337, 

924 31337, 

925 }: 

926 self.set_cookie("c", "s", expires_days=days / 24, path="/") 

927 

928 if ( 

929 self.request.method != "OPTIONS" 

930 and self.MAX_BODY_SIZE is not None 

931 and len(self.request.body) > self.MAX_BODY_SIZE 

932 ): 

933 LOGGER.warning( 

934 "%s > MAX_BODY_SIZE (%s)", 

935 len(self.request.body), 

936 self.MAX_BODY_SIZE, 

937 ) 

938 raise HTTPError(413) 

939 

940 @override 

941 def render( # noqa: D102 

942 self, template_name: str, **kwargs: Any 

943 ) -> Future[None]: 

944 self.used_render = True 

945 return super().render(template_name, **kwargs) 

946 

947 render.__doc__ = _RequestHandler.render.__doc__ 

948 

949 def set_content_type_header(self) -> None: 

950 """Set the Content-Type header based on `self.content_type`.""" 

951 if str(self.content_type).startswith("text/"): # RFC 2616 (3.7.1) 

952 self.set_header( 

953 "Content-Type", f"{self.content_type};charset=utf-8" 

954 ) 

955 elif self.content_type is not None: 

956 self.set_header("Content-Type", self.content_type) 

957 

958 @override 

959 def set_cookie( # noqa: D102 # pylint: disable=too-many-arguments 

960 self, 

961 name: str, 

962 value: str | bytes, 

963 domain: None | str = None, 

964 expires: None | float | tuple[int, ...] | datetime = None, 

965 path: str = "/", 

966 expires_days: None | float = 400, # changed 

967 *, 

968 secure: bool | None = None, 

969 httponly: bool = True, 

970 **kwargs: Any, 

971 ) -> None: 

972 if "samesite" not in kwargs: 

973 # default for same site should be strict 

974 kwargs["samesite"] = "Strict" 

975 

976 super().set_cookie( 

977 name, 

978 value, 

979 domain, 

980 expires, 

981 path, 

982 expires_days, 

983 secure=( 

984 self.request.protocol == "https" if secure is None else secure 

985 ), 

986 httponly=httponly, 

987 **kwargs, 

988 ) 

989 

990 set_cookie.__doc__ = _RequestHandler.set_cookie.__doc__ 

991 

992 def set_csp_header(self) -> None: 

993 """Set the Content-Security-Policy header.""" 

994 self.nonce = secrets.token_urlsafe(16) 

995 

996 script_src = ["'self'", f"'nonce-{self.nonce}'"] 

997 

998 if ( 

999 self.apm_enabled 

1000 and "INLINE_SCRIPT_HASH" in self.settings["ELASTIC_APM"] 

1001 ): 

1002 script_src.extend( 

1003 ( 

1004 f"'sha256-{self.settings['ELASTIC_APM']['INLINE_SCRIPT_HASH']}'", 

1005 "'unsafe-inline'", # for browsers that don't support hash 

1006 ) 

1007 ) 

1008 

1009 connect_src = ["'self'"] 

1010 

1011 if self.apm_enabled and "SERVER_URL" in self.settings["ELASTIC_APM"]: 

1012 rum_server_url = self.settings["ELASTIC_APM"].get("RUM_SERVER_URL") 

1013 if rum_server_url: 

1014 # the RUM agent needs to connect to rum_server_url 

1015 connect_src.append(rum_server_url) 

1016 elif rum_server_url is None: 

1017 # the RUM agent needs to connect to ["ELASTIC_APM"]["SERVER_URL"] 

1018 connect_src.append(self.settings["ELASTIC_APM"]["SERVER_URL"]) 

1019 

1020 connect_src.append( # fix for older browsers 

1021 ("wss" if self.request.protocol == "https" else "ws") 

1022 + f"://{self.request.host}" 

1023 ) 

1024 

1025 self.set_header( 

1026 "Content-Security-Policy", 

1027 "default-src 'self';" 

1028 f"script-src {' '.join(script_src)};" 

1029 f"connect-src {' '.join(connect_src)};" 

1030 "style-src 'self' 'unsafe-inline';" 

1031 "img-src 'self' https://img.zeit.de https://github.asozial.org;" 

1032 "frame-ancestors 'self';" 

1033 "sandbox allow-downloads allow-same-origin allow-modals" 

1034 " allow-popups-to-escape-sandbox allow-scripts allow-popups" 

1035 " allow-top-navigation-by-user-activation allow-forms;" 

1036 "report-to default;" 

1037 "base-uri 'none';" 

1038 + ( 

1039 f"report-uri {self.get_reporting_api_endpoint()};" 

1040 if self.settings.get("REPORTING") 

1041 else "" 

1042 ), 

1043 ) 

1044 

1045 @override 

1046 def set_default_headers(self) -> None: 

1047 """Set default headers.""" 

1048 self.set_csp_header() 

1049 self.active_origin_trials = set() 

1050 if self.settings.get("REPORTING"): 

1051 endpoint = self.get_reporting_api_endpoint() 

1052 self.set_header( 

1053 "Reporting-Endpoints", 

1054 f'default="{endpoint}"', # noqa: B907 

1055 ) 

1056 self.set_header( 

1057 "Report-To", 

1058 json.dumps( 

1059 { 

1060 "group": "default", 

1061 "max_age": 2592000, 

1062 "endpoints": [{"url": endpoint}], 

1063 }, 

1064 option=ORJSON_OPTIONS, 

1065 ), 

1066 ) 

1067 self.set_header("NEL", '{"report_to":"default","max_age":2592000}') 

1068 self.set_header("X-Content-Type-Options", "nosniff") 

1069 self.set_header("Access-Control-Max-Age", "7200") 

1070 self.set_header("Access-Control-Allow-Origin", "*") 

1071 self.set_header("Access-Control-Allow-Headers", "*") 

1072 self.set_header( 

1073 "Access-Control-Allow-Methods", 

1074 ", ".join(self.get_allowed_methods()), 

1075 ) 

1076 self.set_header("Cross-Origin-Resource-Policy", "cross-origin") 

1077 self.set_header( 

1078 "Permissions-Policy", 

1079 "browsing-topics=()," 

1080 "identity-credentials-get=()," 

1081 "join-ad-interest-group=()," 

1082 "private-state-token-issuance=()," 

1083 "private-state-token-redemption=()," 

1084 "run-ad-auction=()", 

1085 ) 

1086 self.set_header("Referrer-Policy", "same-origin") 

1087 self.set_header( 

1088 "Cross-Origin-Opener-Policy", "same-origin; report-to=default" 

1089 ) 

1090 if self.request.path == "/kaenguru-comics-alt": # TODO: improve this 

1091 self.set_header( 

1092 "Cross-Origin-Embedder-Policy", 

1093 "credentialless; report-to=default", 

1094 ) 

1095 else: 

1096 self.set_header( 

1097 "Cross-Origin-Embedder-Policy", 

1098 "require-corp; report-to=default", 

1099 ) 

1100 if self.settings.get("HSTS"): 

1101 self.set_header("Strict-Transport-Security", "max-age=63072000") 

1102 if ( 

1103 onion_address := self.settings.get("ONION_ADDRESS") 

1104 ) and not self.request.host_name.endswith(".onion"): 

1105 self.set_header( 

1106 "Onion-Location", 

1107 onion_address 

1108 + self.request.path 

1109 + (f"?{self.request.query}" if self.request.query else ""), 

1110 ) 

1111 if self.settings.get("debug"): 

1112 self.set_header("X-Debug", bool_to_str(True)) 

1113 for permission in Permission: 

1114 if permission.name: 

1115 self.set_header( 

1116 f"X-Permission-{permission.name}", 

1117 bool_to_str(bool(self.is_authorized(permission))), 

1118 ) 

1119 self.set_header("Vary", "Accept, Authorization, Cookie") 

1120 

1121 set_default_headers.__doc__ = _RequestHandler.set_default_headers.__doc__ 

1122 

1123 @classmethod 

1124 def supports_head(cls) -> bool: 

1125 """Check whether this request handler supports HEAD requests.""" 

1126 signature = inspect.signature(cls.get) 

1127 return ( 

1128 "head" in signature.parameters 

1129 and signature.parameters["head"].kind 

1130 == inspect.Parameter.KEYWORD_ONLY 

1131 ) 

1132 

1133 @cached_property 

1134 def user_settings(self) -> Options: 

1135 """Get the user settings.""" 

1136 return Options(self) 

1137 

1138 @override 

1139 def write(self, chunk: str | bytes | dict[str, Any]) -> None: # noqa: D102 

1140 if self._finished: 

1141 raise RuntimeError("Cannot write() after finish()") 

1142 

1143 self.set_content_type_header() 

1144 

1145 if isinstance(chunk, dict): 

1146 chunk = self.dump(chunk) 

1147 

1148 if self.now.date() == date(self.now.year, 4, 27): 

1149 if isinstance(chunk, bytes): 

1150 with contextlib.suppress(UnicodeDecodeError): 

1151 chunk = chunk.decode("UTF-8") 

1152 if isinstance(chunk, str): 

1153 chunk = regex.sub( 

1154 r"\b\p{Lu}\p{Ll}{4}\p{Ll}*\b", 

1155 lambda match: ( 

1156 "Stanley" 

1157 if Random(match[0]).randrange(5) == self.now.year % 5 

1158 else match[0] 

1159 ), 

1160 chunk, 

1161 ) 

1162 

1163 super().write(chunk) 

1164 

1165 write.__doc__ = _RequestHandler.write.__doc__ 

1166 

1167 @override 

1168 def write_error(self, status_code: int, **kwargs: Any) -> None: 

1169 """Render the error page.""" 

1170 dict_content_types: tuple[str, str] = ( 

1171 "application/json", 

1172 "application/yaml", 

1173 ) 

1174 all_error_content_types: tuple[str, ...] = ( 

1175 # text/plain as first (default), to not screw up output in terminals 

1176 "text/plain", 

1177 "text/html", 

1178 "text/markdown", 

1179 *dict_content_types, 

1180 "application/vnd.asozial.dynload+json", 

1181 ) 

1182 

1183 if self.content_type not in all_error_content_types: 

1184 # don't send 406, instead default with text/plain 

1185 self.handle_accept_header(all_error_content_types, strict=False) 

1186 

1187 if self.content_type == "text/html": 

1188 self.render( # type: ignore[unused-awaitable] 

1189 "error.html", 

1190 status=status_code, 

1191 reason=self.get_error_message(**kwargs), 

1192 description=self.get_error_page_description(status_code), 

1193 is_traceback="exc_info" in kwargs 

1194 and not issubclass(kwargs["exc_info"][0], HTTPError) 

1195 and ( 

1196 self.settings.get("serve_traceback") 

1197 or self.is_authorized(Permission.TRACEBACK) 

1198 ), 

1199 ) 

1200 return 

1201 

1202 if self.content_type in dict_content_types: 

1203 self.finish( # type: ignore[unused-awaitable] 

1204 { 

1205 "status": status_code, 

1206 "reason": self.get_error_message(**kwargs), 

1207 } 

1208 ) 

1209 return 

1210 

1211 self.finish( # type: ignore[unused-awaitable] 

1212 f"{status_code} {self.get_error_message(**kwargs)}\n" 

1213 ) 

1214 

1215 write_error.__doc__ = _RequestHandler.write_error.__doc__