Coverage for an_website/utils/base_request_handler.py: 78.659%

492 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-10-04 17:54 +0000

1# This program is free software: you can redistribute it and/or modify 

2# it under the terms of the GNU Affero General Public License as 

3# published by the Free Software Foundation, either version 3 of the 

4# License, or (at your option) any later version. 

5# 

6# This program is distributed in the hope that it will be useful, 

7# but WITHOUT ANY WARRANTY; without even the implied warranty of 

8# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

9# GNU Affero General Public License for more details. 

10# 

11# You should have received a copy of the GNU Affero General Public License 

12# along with this program. If not, see <https://www.gnu.org/licenses/>. 

13# pylint: disable=too-many-lines 

14 

15""" 

16The base request handler used by other modules. 

17 

18This should only contain the BaseRequestHandler class. 

19""" 

20 

21from __future__ import annotations 

22 

23import contextlib 

24import inspect 

25import logging 

26import secrets 

27import sys 

28import traceback 

29import uuid 

30from asyncio import Future 

31from base64 import b64decode 

32from collections.abc import Awaitable, Callable, Coroutine, Mapping 

33from contextvars import ContextVar 

34from datetime import date, datetime, timedelta, timezone, tzinfo 

35from functools import cached_property, partial, reduce 

36from random import Random, choice as random_choice 

37from types import TracebackType 

38from typing import Any, ClassVar, Final, cast, override 

39from urllib.parse import SplitResult, urlsplit, urlunsplit 

40from zoneinfo import ZoneInfo 

41 

42import elasticapm 

43import html2text 

44import orjson as json 

45import regex 

46import tornado.web 

47import yaml 

48from accept_types import get_best_match # type: ignore[import-untyped] 

49from ansi2html import Ansi2HTMLConverter 

50from bs4 import BeautifulSoup 

51from dateutil.easter import easter 

52from elastic_transport import ApiError, TransportError 

53from elasticsearch import AsyncElasticsearch 

54from openmoji_dist import VERSION as OPENMOJI_VERSION 

55from redis.asyncio import Redis 

56from tornado.httputil import HTTPServerRequest 

57from tornado.iostream import StreamClosedError 

58from tornado.web import ( 

59 Finish, 

60 GZipContentEncoding, 

61 HTTPError, 

62 MissingArgumentError, 

63 OutputTransform, 

64) 

65 

66from .. import ( 

67 EVENT_ELASTICSEARCH, 

68 EVENT_REDIS, 

69 GH_ORG_URL, 

70 GH_PAGES_URL, 

71 GH_REPO_URL, 

72 NAME, 

73 ORJSON_OPTIONS, 

74 pytest_is_running, 

75) 

76from .decorators import is_authorized 

77from .options import ColourScheme, Options 

78from .static_file_handling import FILE_HASHES_DICT, fix_static_path 

79from .themes import THEMES 

80from .utils import ( 

81 ModuleInfo, 

82 Permission, 

83 add_args_to_url, 

84 ansi_replace, 

85 apply, 

86 backspace_replace, 

87 bool_to_str, 

88 emoji2html, 

89 geoip, 

90 hash_bytes, 

91 is_prime, 

92 ratelimit, 

93 str_to_bool, 

94) 

95 

96LOGGER: Final = logging.getLogger(__name__) 

97 

98TEXT_CONTENT_TYPES: Final[set[str]] = { 

99 "application/javascript", 

100 "application/json", 

101 "application/vnd.asozial.dynload+json", 

102 "application/x-ndjson", 

103 "application/xml", 

104 "application/yaml", 

105} 

106 

107request_ctx_var: ContextVar[HTTPServerRequest] = ContextVar("current_request") 

108 

109 

110class _RequestHandler(tornado.web.RequestHandler): 

111 """Base for Tornado request handlers.""" 

112 

113 crawler: bool = False 

114 

115 @override 

116 async def _execute( 

117 self, transforms: list[OutputTransform], *args: bytes, **kwargs: bytes 

118 ) -> None: 

119 request_ctx_var.set(self.request) 

120 return await super()._execute(transforms, *args, **kwargs) 

121 

122 # pylint: disable-next=protected-access 

123 _execute.__doc__ = tornado.web.RequestHandler._execute.__doc__ 

124 

125 @property 

126 def apm_client(self) -> None | elasticapm.Client: 

127 """Get the APM client from the settings.""" 

128 return self.settings.get("ELASTIC_APM", {}).get("CLIENT") # type: ignore[no-any-return] 

129 

130 @property 

131 def apm_enabled(self) -> bool: 

132 """Return whether APM is enabled.""" 

133 return bool(self.settings.get("ELASTIC_APM", {}).get("ENABLED")) 

134 

135 @override 

136 def data_received( # noqa: D102 

137 self, chunk: bytes 

138 ) -> None | Awaitable[None]: 

139 pass 

140 

141 data_received.__doc__ = tornado.web.RequestHandler.data_received.__doc__ 

142 

143 @property 

144 def elasticsearch(self) -> AsyncElasticsearch: 

145 """ 

146 Get the Elasticsearch client from the settings. 

147 

148 This is None if Elasticsearch is not enabled. 

149 """ 

150 return cast(AsyncElasticsearch, self.settings.get("ELASTICSEARCH")) 

151 

152 @property 

153 def elasticsearch_prefix(self) -> str: 

154 """Get the Elasticsearch prefix from the settings.""" 

155 return self.settings.get( # type: ignore[no-any-return] 

156 "ELASTICSEARCH_PREFIX", NAME 

157 ) 

158 

159 def geoip( 

160 self, 

161 ip: None | str = None, 

162 database: str = geoip.__defaults__[0], # type: ignore[index] 

163 *, 

164 allow_fallback: bool = True, 

165 ) -> Coroutine[None, None, None | dict[str, Any]]: 

166 """Get GeoIP information.""" 

167 if not ip: 

168 ip = self.request.remote_ip 

169 if not EVENT_ELASTICSEARCH.is_set(): 

170 return geoip(ip, database) 

171 return geoip( 

172 ip, database, self.elasticsearch, allow_fallback=allow_fallback 

173 ) 

174 

175 async def get_time(self) -> datetime: 

176 """Get the start time of the request in the users' timezone.""" 

177 tz: tzinfo = timezone.utc 

178 try: 

179 geoip = await self.geoip() # pylint: disable=redefined-outer-name 

180 except (ApiError, TransportError): 

181 LOGGER.exception("Elasticsearch request failed") 

182 if self.apm_client: 

183 self.apm_client.capture_exception() # type: ignore[no-untyped-call] 

184 else: 

185 if geoip and "timezone" in geoip: 

186 tz = ZoneInfo(geoip["timezone"]) 

187 return datetime.fromtimestamp( 

188 self.request._start_time, tz=tz # pylint: disable=protected-access 

189 ) 

190 

191 def is_authorized( 

192 self, permission: Permission, allow_cookie_auth: bool = True 

193 ) -> bool | None: 

194 """Check whether the request is authorized.""" 

195 return is_authorized(self, permission, allow_cookie_auth) 

196 

197 @override 

198 def log_exception( 

199 self, 

200 typ: None | type[BaseException], 

201 value: None | BaseException, 

202 tb: None | TracebackType, 

203 ) -> None: 

204 if isinstance(value, HTTPError): 

205 super().log_exception(typ, value, tb) 

206 elif typ is StreamClosedError: 

207 LOGGER.debug( 

208 "Stream closed %s", 

209 self._request_summary(), 

210 exc_info=(typ, value, tb), # type: ignore[arg-type] 

211 ) 

212 else: 

213 LOGGER.error( 

214 "Uncaught exception %s", 

215 self._request_summary(), 

216 exc_info=(typ, value, tb), # type: ignore[arg-type] 

217 ) 

218 

219 log_exception.__doc__ = tornado.web.RequestHandler.log_exception.__doc__ 

220 

221 @cached_property 

222 def now(self) -> datetime: 

223 """Get the current time.""" 

224 # pylint: disable=method-hidden 

225 if pytest_is_running(): 

226 raise AssertionError("Now accessed before it was set") 

227 if self.request.method in self.SUPPORTED_METHODS: 

228 LOGGER.error("Now accessed before it was set", stacklevel=3) 

229 return datetime.fromtimestamp( 

230 self.request._start_time, # pylint: disable=protected-access 

231 tz=timezone.utc, 

232 ) 

233 

234 @override 

235 async def prepare(self) -> None: 

236 """Check authorization and call self.ratelimit().""" 

237 # pylint: disable=invalid-overridden-method 

238 self.now = await self.get_time() 

239 

240 if crawler_secret := self.settings.get("CRAWLER_SECRET"): 

241 self.crawler = crawler_secret in self.request.headers.get( 

242 "User-Agent", "" 

243 ) 

244 

245 if ( 

246 self.request.method in {"GET", "HEAD"} 

247 and self.redirect_to_canonical_domain() 

248 ): 

249 return 

250 

251 if self.request.method != "OPTIONS" and not await self.ratelimit(True): 

252 await self.ratelimit() 

253 

254 async def ratelimit(self, global_ratelimit: bool = False) -> bool: 

255 """Take b1nzy to space using Redis.""" 

256 if ( 

257 not self.settings.get("RATELIMITS") 

258 or self.request.method == "OPTIONS" 

259 or self.is_authorized(Permission.RATELIMITS) 

260 or self.crawler 

261 ): 

262 return False 

263 

264 if not EVENT_REDIS.is_set(): 

265 LOGGER.warning( 

266 ( 

267 "Ratelimits are enabled, but Redis is not available. " 

268 "This can happen shortly after starting the website." 

269 ), 

270 ) 

271 raise HTTPError(503) 

272 

273 if global_ratelimit: # TODO: add to _RequestHandler 

274 ratelimited, headers = await ratelimit( 

275 self.redis, 

276 self.redis_prefix, 

277 str(self.request.remote_ip), 

278 bucket=None, 

279 max_burst=99, # limit = 100 

280 count_per_period=20, # 20 requests per second 

281 period=1, 

282 tokens=10 if self.settings.get("UNDER_ATTACK") else 1, 

283 ) 

284 else: 

285 method = ( 

286 "GET" if self.request.method == "HEAD" else self.request.method 

287 ) 

288 if not (limit := getattr(self, f"RATELIMIT_{method}_LIMIT", 0)): 

289 return False 

290 ratelimited, headers = await ratelimit( 

291 self.redis, 

292 self.redis_prefix, 

293 str(self.request.remote_ip), 

294 bucket=getattr( 

295 self, 

296 f"RATELIMIT_{method}_BUCKET", 

297 self.__class__.__name__.lower(), 

298 ), 

299 max_burst=limit - 1, 

300 count_per_period=getattr( # request count per period 

301 self, 

302 f"RATELIMIT_{method}_COUNT_PER_PERIOD", 

303 30, 

304 ), 

305 period=getattr( 

306 self, f"RATELIMIT_{method}_PERIOD", 60 # period in seconds 

307 ), 

308 tokens=1 if self.request.method != "HEAD" else 0, 

309 ) 

310 

311 for header, value in headers.items(): 

312 self.set_header(header, value) 

313 

314 if ratelimited: 

315 if self.now.date() == date(self.now.year, 4, 20): 

316 self.set_status(420) 

317 self.write_error(420) 

318 else: 

319 self.set_status(429) 

320 self.write_error(429) 

321 

322 return ratelimited 

323 

324 def redirect_to_canonical_domain(self) -> bool: 

325 """Redirect to the canonical domain.""" 

326 if ( 

327 not (domain := self.settings.get("DOMAIN")) 

328 or not self.request.headers.get("Host") 

329 or self.request.host_name == domain 

330 or self.request.host_name.endswith((".onion", ".i2p")) 

331 or regex.fullmatch(r"/[\u2800-\u28FF]+/?", self.request.path) 

332 ): 

333 return False 

334 port = urlsplit(f"//{self.request.headers['Host']}").port 

335 self.redirect( 

336 urlsplit(self.request.full_url()) 

337 ._replace(netloc=f"{domain}:{port}" if port else domain) 

338 .geturl(), 

339 permanent=True, 

340 ) 

341 return True 

342 

343 @property 

344 def redis(self) -> Redis[str]: 

345 """ 

346 Get the Redis client from the settings. 

347 

348 This is None if Redis is not enabled. 

349 """ 

350 return cast("Redis[str]", self.settings.get("REDIS")) 

351 

352 @property 

353 def redis_prefix(self) -> str: 

354 """Get the Redis prefix from the settings.""" 

355 return self.settings.get( # type: ignore[no-any-return] 

356 "REDIS_PREFIX", NAME 

357 ) 

358 

359 

360class BaseRequestHandler(_RequestHandler): 

361 """The base request handler used by every page and API.""" 

362 

363 # pylint: disable=too-many-instance-attributes, too-many-public-methods 

364 

365 ELASTIC_RUM_URL: ClassVar[str] = ( 

366 f"/@apm-rum/elastic-apm-rum.umd{'' if sys.flags.dev_mode else '.min'}.js" 

367 "?v=5.12.0" 

368 ) 

369 

370 COMPUTE_ETAG: ClassVar[bool] = True 

371 ALLOW_COMPRESSION: ClassVar[bool] = True 

372 MAX_BODY_SIZE: ClassVar[None | int] = None 

373 ALLOWED_METHODS: ClassVar[tuple[str, ...]] = ("GET",) 

374 POSSIBLE_CONTENT_TYPES: ClassVar[tuple[str, ...]] = () 

375 

376 module_info: ModuleInfo 

377 # info about page, can be overridden in module_info 

378 title: str = "Das Asoziale Netzwerk" 

379 short_title: str = "Asoziales Netzwerk" 

380 description: str = "Die tolle Webseite des Asozialen Netzwerks" 

381 

382 used_render: bool = False 

383 

384 active_origin_trials: set[str] 

385 content_type: None | str = None 

386 apm_script: None | str 

387 nonce: str 

388 

389 def _finish( 

390 self, chunk: None | str | bytes | dict[str, Any] = None 

391 ) -> Future[None]: 

392 if self._finished: 

393 raise RuntimeError("finish() called twice") 

394 

395 if chunk is not None: 

396 self.write(chunk) 

397 

398 if ( # pylint: disable=too-many-boolean-expressions 

399 (content_type := self.content_type) 

400 and ( 

401 content_type in TEXT_CONTENT_TYPES 

402 or content_type.startswith("text/") 

403 or content_type.endswith(("+xml", "+json")) 

404 ) 

405 and self._write_buffer 

406 and not self._write_buffer[-1].endswith(b"\n") 

407 ): 

408 self.write(b"\n") 

409 

410 return super().finish() 

411 

412 @override 

413 def compute_etag(self) -> None | str: 

414 """Compute ETag with Base85 encoding.""" 

415 if not self.COMPUTE_ETAG: 

416 return None 

417 return f'"{hash_bytes(*self._write_buffer)}"' # noqa: B907 

418 

419 @override 

420 def decode_argument( # noqa: D102 

421 self, value: bytes, name: str | None = None 

422 ) -> str: 

423 try: 

424 return value.decode("UTF-8", "replace") 

425 except UnicodeDecodeError as exc: 

426 err_msg = f"Invalid unicode in {name or 'url'}: {value[:40]!r}" 

427 LOGGER.exception(err_msg, exc_info=exc) 

428 raise HTTPError(400, err_msg) from exc 

429 

430 @property 

431 def dump(self) -> Callable[[Any], str | bytes]: 

432 """Get the function for dumping the output.""" 

433 yaml_subset = self.content_type in { 

434 "application/json", 

435 "application/vnd.asozial.dynload+json", 

436 } 

437 

438 if self.content_type == "application/yaml": 

439 if self.now.timetuple()[2:0:-1] == (1, 4): 

440 yaml_subset = True 

441 else: 

442 return lambda spam: yaml.dump( 

443 spam, 

444 width=self.get_int_argument("yaml_width", 80, min_=80), 

445 ) 

446 

447 if yaml_subset: 

448 option = ORJSON_OPTIONS 

449 if self.get_bool_argument("pretty", False): 

450 option |= json.OPT_INDENT_2 

451 return lambda spam: json.dumps(spam, option=option) 

452 

453 return lambda spam: spam 

454 

455 @override 

456 def finish( # noqa: D102 

457 self, chunk: None | str | bytes | dict[Any, Any] = None 

458 ) -> Future[None]: 

459 as_json = self.content_type == "application/vnd.asozial.dynload+json" 

460 as_plain_text = self.content_type == "text/plain" 

461 as_markdown = self.content_type == "text/markdown" 

462 

463 if ( 

464 not isinstance(chunk, bytes | str) 

465 or self.content_type == "text/html" 

466 or not self.used_render 

467 or not (as_json or as_plain_text or as_markdown) 

468 ): 

469 return self._finish(chunk) 

470 

471 chunk = chunk.decode("UTF-8") if isinstance(chunk, bytes) else chunk 

472 

473 if as_markdown: 

474 return self._finish( 

475 f"# {self.title}\n\n" 

476 + html2text.html2text(chunk, self.request.full_url()).strip() 

477 ) 

478 

479 soup = BeautifulSoup(chunk, features="lxml") 

480 

481 if as_plain_text: 

482 return self._finish(soup.get_text("\n", True)) 

483 

484 dictionary: dict[str, object] = { 

485 "url": self.fix_url(include_protocol_and_host=True), 

486 "title": self.title, 

487 "short_title": ( 

488 self.short_title if self.title != self.short_title else None 

489 ), 

490 "body": "".join( 

491 str(element) 

492 for element in soup.find_all(name="main")[0].contents 

493 ).strip(), 

494 "scripts": [ 

495 {"script": script.string} | script.attrs 

496 for script in soup.find_all("script") 

497 ], 

498 "stylesheets": [ 

499 stylesheet.get("href").strip() 

500 for stylesheet in soup.find_all("link", rel="stylesheet") 

501 ], 

502 "css": "\n".join(style.string for style in soup.find_all("style")), 

503 } 

504 

505 return self._finish(dictionary) 

506 

507 finish.__doc__ = _RequestHandler.finish.__doc__ 

508 

509 def finish_dict(self, **kwargs: Any) -> Future[None]: 

510 """Finish the request with a dictionary.""" 

511 return self.finish(kwargs) 

512 

513 def fix_url( 

514 self, 

515 url: None | str | SplitResult = None, 

516 new_path: None | str = None, 

517 include_protocol_and_host: bool | str = False, 

518 query_args: Mapping[str, None | str | bool | float] | None = None, 

519 ) -> str: 

520 """ 

521 Fix a URL and return it. 

522 

523 If the URL is from another website, link to it with the redirect page, 

524 otherwise just return the URL with no_3rd_party appended. 

525 """ 

526 query_args_d = dict(query_args or {}) 

527 del query_args 

528 if url is None: 

529 url = self.request.full_url() 

530 if isinstance(url, str): 

531 url = urlsplit(url) 

532 if url.netloc and url.netloc.lower() != self.request.host.lower(): 

533 if ( 

534 not self.user_settings.ask_before_leaving 

535 or not self.settings.get("REDIRECT_MODULE_LOADED") 

536 ): 

537 return url.geturl() 

538 path = "/redirect" 

539 query_args_d["to"] = url.geturl() 

540 url = urlsplit(self.request.full_url()) 

541 else: 

542 path = url.path if new_path is None else new_path 

543 path = f"/{path.strip('/')}".lower() 

544 if path == "/lolwut": 

545 path = path.upper() 

546 if path.startswith("/soundboard/files/") or path in FILE_HASHES_DICT: 

547 query_args_d.update( 

548 dict.fromkeys(self.user_settings.iter_option_names()) 

549 ) 

550 else: 

551 for ( 

552 key, 

553 value, 

554 ) in self.user_settings.as_dict_with_str_values().items(): 

555 query_args_d.setdefault(key, value) 

556 for key, value in self.user_settings.as_dict_with_str_values( 

557 include_query_argument=False, 

558 include_body_argument=self.request.path == "/einstellungen" 

559 and self.get_bool_argument("save_in_cookie", False), 

560 ).items(): 

561 if value == query_args_d[key]: 

562 query_args_d[key] = None 

563 

564 result = add_args_to_url( 

565 urlunsplit( 

566 ( 

567 self.request.protocol, 

568 self.request.host, 

569 path, 

570 url.query, 

571 url.fragment, 

572 ) 

573 ), 

574 **query_args_d, 

575 ) 

576 

577 return ( 

578 result 

579 if include_protocol_and_host 

580 else result.removeprefix( 

581 f"{self.request.protocol}://{self.request.host}" 

582 ) 

583 ) 

584 

585 @classmethod 

586 def get_allowed_methods(cls) -> list[str]: 

587 """Get allowed methods.""" 

588 methods = {"OPTIONS", *cls.ALLOWED_METHODS} 

589 if "GET" in cls.ALLOWED_METHODS and cls.supports_head(): 

590 methods.add("HEAD") 

591 return sorted(methods) 

592 

593 def get_bool_argument( 

594 self, 

595 name: str, 

596 default: None | bool = None, 

597 ) -> bool: 

598 """Get an argument parsed as boolean.""" 

599 if default is not None: 

600 return str_to_bool(self.get_argument(name, ""), default) 

601 value = str(self.get_argument(name)) 

602 try: 

603 return str_to_bool(value) 

604 except ValueError as err: 

605 raise HTTPError(400, f"{value} is not a boolean") from err 

606 

607 def get_display_scheme(self) -> ColourScheme: 

608 """Get the scheme currently displayed.""" 

609 scheme = self.user_settings.scheme 

610 if scheme == "random": 

611 return ("light", "dark")[self.now.microsecond & 1] 

612 return scheme 

613 

614 def get_display_theme(self) -> str: 

615 """Get the theme currently displayed.""" 

616 theme = self.user_settings.theme 

617 

618 if theme == "default" and self.now.month == 12: 

619 return "christmas" 

620 

621 if theme != "random": 

622 return theme 

623 

624 ignore_themes = ("random", "christmas") 

625 

626 return random_choice( # nosec: B311 

627 tuple(theme for theme in THEMES if theme not in ignore_themes) 

628 ) 

629 

630 def get_error_message(self, **kwargs: Any) -> str: 

631 """ 

632 Get the error message and return it. 

633 

634 If the serve_traceback setting is true (debug mode is activated), 

635 the traceback gets returned. 

636 """ 

637 if "exc_info" in kwargs and not issubclass( 

638 kwargs["exc_info"][0], HTTPError 

639 ): 

640 if self.settings.get("serve_traceback") or self.is_authorized( 

641 Permission.TRACEBACK 

642 ): 

643 return "".join( 

644 traceback.format_exception(*kwargs["exc_info"]) 

645 ).strip() 

646 return "".join( 

647 traceback.format_exception_only(*kwargs["exc_info"][:2]) 

648 ).strip() 

649 if "exc_info" in kwargs and issubclass( 

650 kwargs["exc_info"][0], MissingArgumentError 

651 ): 

652 return cast(str, kwargs["exc_info"][1].log_message) 

653 return str(self._reason) 

654 

655 def get_error_page_description(self, status_code: int) -> str: 

656 """Get the description for the error page.""" 

657 # pylint: disable=too-many-return-statements 

658 # https://developer.mozilla.org/docs/Web/HTTP/Status 

659 if 100 <= status_code <= 199: 

660 return "Hier gibt es eine total wichtige Information." 

661 if 200 <= status_code <= 299: 

662 return "Hier ist alles super! 🎶🎶" 

663 if 300 <= status_code <= 399: 

664 return "Eine Umleitung ist eingerichtet." 

665 if 400 <= status_code <= 499: 

666 if status_code == 404: 

667 return f"{self.request.path} wurde nicht gefunden." 

668 if status_code == 451: 

669 return "Hier wäre bestimmt geiler Scheiß." 

670 return "Ein Client-Fehler ist aufgetreten." 

671 if 500 <= status_code <= 599: 

672 return "Ein Server-Fehler ist aufgetreten." 

673 raise ValueError( 

674 f"{status_code} is not a valid HTTP response status code." 

675 ) 

676 

677 def get_int_argument( 

678 self, 

679 name: str, 

680 default: None | int = None, 

681 *, 

682 max_: None | int = None, 

683 min_: None | int = None, 

684 ) -> int: 

685 """Get an argument parsed as integer.""" 

686 if default is None: 

687 str_value = self.get_argument(name) 

688 try: 

689 value = int(str_value, base=0) 

690 except ValueError as err: 

691 raise HTTPError(400, f"{str_value} is not an integer") from err 

692 elif self.get_argument(name, ""): 

693 try: 

694 value = int(self.get_argument(name), base=0) 

695 except ValueError: 

696 value = default 

697 else: 

698 value = default 

699 

700 if max_ is not None: 

701 value = min(max_, value) 

702 if min_ is not None: 

703 value = max(min_, value) 

704 

705 return value 

706 

707 def get_module_infos(self) -> tuple[ModuleInfo, ...]: 

708 """Get the module infos.""" 

709 return self.settings.get("MODULE_INFOS") or () 

710 

711 def get_reporting_api_endpoint(self) -> None | str: 

712 """Get the endpoint for the Reporting API™️.""" 

713 if not self.settings.get("REPORTING"): 

714 return None 

715 endpoint = self.settings.get("REPORTING_ENDPOINT") 

716 

717 if not endpoint or not endpoint.startswith("/"): 

718 return endpoint 

719 

720 return f"{self.request.protocol}://{self.request.host}{endpoint}" 

721 

722 @override 

723 def get_template_namespace(self) -> dict[str, Any]: 

724 """ 

725 Add useful things to the template namespace and return it. 

726 

727 They are mostly needed by most of the pages (like title, 

728 description and no_3rd_party). 

729 """ 

730 namespace = super().get_template_namespace() 

731 ansi2html = partial( 

732 Ansi2HTMLConverter(inline=True, scheme="xterm").convert, full=False 

733 ) 

734 namespace.update(self.user_settings.as_dict()) 

735 namespace.update( 

736 ansi2html=partial( 

737 reduce, apply, (ansi2html, ansi_replace, backspace_replace) 

738 ), 

739 apm_script=( 

740 self.settings["ELASTIC_APM"].get("INLINE_SCRIPT") 

741 if self.apm_enabled 

742 else None 

743 ), 

744 as_html=self.content_type == "text/html", 

745 c=self.now.date() == date(self.now.year, 4, 1) 

746 or str_to_bool(self.get_cookie("c", "f") or "f", False), 

747 canonical_url=self.request.protocol 

748 + "://" 

749 + (self.settings["DOMAIN"] or self.request.host) 

750 + self.fix_url( 

751 self.request.full_url().upper() 

752 if self.request.path.upper().startswith("/LOLWUT") 

753 else self.request.full_url().lower() 

754 ) 

755 .split("?")[0] 

756 .removesuffix("/"), 

757 description=self.description, 

758 display_theme=self.get_display_theme(), 

759 display_scheme=self.get_display_scheme(), 

760 elastic_rum_url=self.ELASTIC_RUM_URL, 

761 fix_static=lambda path: self.fix_url(fix_static_path(path)), 

762 fix_url=self.fix_url, 

763 emoji2html=( 

764 emoji2html 

765 if self.user_settings.openmoji == "img" 

766 else ( 

767 (lambda emoji: f'<span class="openmoji">{emoji}</span>') 

768 if self.user_settings.openmoji 

769 else (lambda emoji: f"<span>{emoji}</span>") 

770 ) 

771 ), 

772 form_appendix=self.user_settings.get_form_appendix(), 

773 GH_ORG_URL=GH_ORG_URL, 

774 GH_PAGES_URL=GH_PAGES_URL, 

775 GH_REPO_URL=GH_REPO_URL, 

776 keywords="Asoziales Netzwerk, Känguru-Chroniken" 

777 + ( 

778 f", {self.module_info.get_keywords_as_str(self.request.path)}" 

779 if self.module_info # type: ignore[truthy-bool] 

780 else "" 

781 ), 

782 lang="de", # TODO: add language support 

783 nonce=self.nonce, 

784 now=self.now, 

785 openmoji_version=OPENMOJI_VERSION, 

786 settings=self.settings, 

787 short_title=self.short_title, 

788 testing=pytest_is_running(), 

789 title=self.title, 

790 ) 

791 namespace.update( 

792 { 

793 "🥚": timedelta() 

794 <= self.now.date() - easter(self.now.year) 

795 < timedelta(days=2), 

796 "🦘": is_prime(self.now.microsecond), 

797 } 

798 ) 

799 return namespace 

800 

801 def get_user_id(self) -> str: 

802 """Get the user id saved in the cookie or create one.""" 

803 cookie = self.get_secure_cookie( 

804 "user_id", 

805 max_age_days=90, 

806 min_version=2, 

807 ) 

808 

809 user_id = cookie.decode("UTF-8") if cookie else str(uuid.uuid4()) 

810 

811 if not self.get_secure_cookie( # save it in cookie or reset expiry date 

812 "user_id", max_age_days=30, min_version=2 

813 ): 

814 self.set_secure_cookie( 

815 "user_id", 

816 user_id, 

817 expires_days=90, 

818 path="/", 

819 samesite="Strict", 

820 ) 

821 

822 return user_id 

823 

824 def handle_accept_header( # pylint: disable=inconsistent-return-statements 

825 self, possible_content_types: tuple[str, ...], strict: bool = True 

826 ) -> None: 

827 """Handle the Accept header and set `self.content_type`.""" 

828 if not possible_content_types: 

829 return 

830 content_type = get_best_match( 

831 self.request.headers.get("Accept") or "*/*", 

832 possible_content_types, 

833 ) 

834 if content_type is None: 

835 if strict: 

836 return self.handle_not_acceptable(possible_content_types) 

837 content_type = possible_content_types[0] 

838 self.content_type = content_type 

839 self.set_content_type_header() 

840 

841 def handle_not_acceptable( 

842 self, possible_content_types: tuple[str, ...] 

843 ) -> None: 

844 """Only call this if we cannot respect the Accept header.""" 

845 self.clear_header("Content-Type") 

846 self.set_status(406) 

847 raise Finish("\n".join(possible_content_types) + "\n") 

848 

849 def head(self, *args: Any, **kwargs: Any) -> None | Awaitable[None]: 

850 """Handle HEAD requests.""" 

851 if self.get.__module__ == "tornado.web": 

852 raise HTTPError(405) 

853 if not self.supports_head(): 

854 raise HTTPError(501) 

855 

856 kwargs["head"] = True 

857 return self.get(*args, **kwargs) 

858 

859 @override 

860 def initialize( 

861 self, 

862 *, 

863 module_info: ModuleInfo, 

864 # default is true, because then empty args dicts are 

865 # enough to specify that the defaults should be used 

866 default_title: bool = True, 

867 default_description: bool = True, 

868 ) -> None: 

869 """ 

870 Get title and description from the kwargs. 

871 

872 If title and description are present in the kwargs, 

873 then they override self.title and self.description. 

874 """ 

875 self.module_info = module_info 

876 if not default_title: 

877 page_info = self.module_info.get_page_info(self.request.path) 

878 self.title = page_info.name 

879 self.short_title = page_info.short_name or self.title 

880 if not default_description: 

881 self.description = self.module_info.get_page_info( 

882 self.request.path 

883 ).description 

884 

885 @override 

886 async def options(self, *args: Any, **kwargs: Any) -> None: 

887 """Handle OPTIONS requests.""" 

888 # pylint: disable=unused-argument 

889 self.set_header("Allow", ", ".join(self.get_allowed_methods())) 

890 self.set_status(204) 

891 await self.finish() 

892 

893 def origin_trial(self, token: bytes | str) -> bool: 

894 """Enable an experimental feature.""" 

895 # pylint: disable=protected-access 

896 payload = json.loads(b64decode(token)[69:]) 

897 if payload["feature"] in self.active_origin_trials: 

898 return True 

899 origin = urlsplit(payload["origin"]) 

900 url = urlsplit(self.request.full_url()) 

901 if url.port is None and url.scheme in {"http", "https"}: 

902 url = url._replace( 

903 netloc=f"{url.hostname}:{443 if url.scheme == 'https' else 80}" 

904 ) 

905 if self.request._start_time > payload["expiry"]: 

906 return False 

907 if url.scheme != origin.scheme: 

908 return False 

909 if url.netloc != origin.netloc and not ( 

910 payload.get("isSubdomain") 

911 and url.netloc.endswith(f".{origin.netloc}") 

912 ): 

913 return False 

914 self.add_header("Origin-Trial", token) 

915 self.active_origin_trials.add(payload["feature"]) 

916 return True 

917 

918 @override 

919 async def prepare(self) -> None: 

920 """Check authorization and call self.ratelimit().""" 

921 await super().prepare() 

922 

923 if self._finished: 

924 return 

925 

926 if not self.ALLOW_COMPRESSION: 

927 for transform in self._transforms: 

928 if isinstance(transform, GZipContentEncoding): 

929 # pylint: disable=protected-access 

930 transform._gzipping = False 

931 

932 self.handle_accept_header(self.POSSIBLE_CONTENT_TYPES) 

933 

934 if self.request.method == "GET" and ( 

935 days := Random(self.now.timestamp()).randint(0, 31337) 

936 ) in { 

937 69, 

938 420, 

939 1337, 

940 31337, 

941 }: 

942 self.set_cookie("c", "s", expires_days=days / 24, path="/") 

943 

944 if ( 

945 self.request.method != "OPTIONS" 

946 and self.MAX_BODY_SIZE is not None 

947 and len(self.request.body) > self.MAX_BODY_SIZE 

948 ): 

949 LOGGER.warning( 

950 "%s > MAX_BODY_SIZE (%s)", 

951 len(self.request.body), 

952 self.MAX_BODY_SIZE, 

953 ) 

954 raise HTTPError(413) 

955 

956 @override 

957 def render( # noqa: D102 

958 self, template_name: str, **kwargs: Any 

959 ) -> Future[None]: 

960 self.used_render = True 

961 return super().render(template_name, **kwargs) 

962 

963 render.__doc__ = _RequestHandler.render.__doc__ 

964 

965 def set_content_type_header(self) -> None: 

966 """Set the Content-Type header based on `self.content_type`.""" 

967 if str(self.content_type).startswith("text/"): # RFC 2616 (3.7.1) 

968 self.set_header( 

969 "Content-Type", f"{self.content_type};charset=utf-8" 

970 ) 

971 elif self.content_type is not None: 

972 self.set_header("Content-Type", self.content_type) 

973 

974 @override 

975 def set_cookie( # noqa: D102 # pylint: disable=too-many-arguments 

976 self, 

977 name: str, 

978 value: str | bytes, 

979 domain: None | str = None, 

980 expires: None | float | tuple[int, ...] | datetime = None, 

981 path: str = "/", 

982 expires_days: None | float = 400, # changed 

983 *, 

984 secure: bool | None = None, 

985 httponly: bool = True, 

986 **kwargs: Any, 

987 ) -> None: 

988 if "samesite" not in kwargs: 

989 # default for same site should be strict 

990 kwargs["samesite"] = "Strict" 

991 

992 super().set_cookie( 

993 name, 

994 value, 

995 domain, 

996 expires, 

997 path, 

998 expires_days, 

999 secure=( 

1000 self.request.protocol == "https" if secure is None else secure 

1001 ), 

1002 httponly=httponly, 

1003 **kwargs, 

1004 ) 

1005 

1006 set_cookie.__doc__ = _RequestHandler.set_cookie.__doc__ 

1007 

1008 def set_csp_header(self) -> None: 

1009 """Set the Content-Security-Policy header.""" 

1010 self.nonce = secrets.token_urlsafe(16) 

1011 

1012 script_src = ["'self'", f"'nonce-{self.nonce}'"] 

1013 

1014 if ( 

1015 self.apm_enabled 

1016 and "INLINE_SCRIPT_HASH" in self.settings["ELASTIC_APM"] 

1017 ): 

1018 script_src.extend( 

1019 ( 

1020 f"'sha256-{self.settings['ELASTIC_APM']['INLINE_SCRIPT_HASH']}'", 

1021 "'unsafe-inline'", # for browsers that don't support hash 

1022 ) 

1023 ) 

1024 

1025 connect_src = ["'self'"] 

1026 

1027 if self.apm_enabled and "SERVER_URL" in self.settings["ELASTIC_APM"]: 

1028 rum_server_url = self.settings["ELASTIC_APM"].get("RUM_SERVER_URL") 

1029 if rum_server_url: 

1030 # the RUM agent needs to connect to rum_server_url 

1031 connect_src.append(rum_server_url) 

1032 elif rum_server_url is None: 

1033 # the RUM agent needs to connect to ["ELASTIC_APM"]["SERVER_URL"] 

1034 connect_src.append(self.settings["ELASTIC_APM"]["SERVER_URL"]) 

1035 

1036 connect_src.append( # fix for older browsers 

1037 ("wss" if self.request.protocol == "https" else "ws") 

1038 + f"://{self.request.host}" 

1039 ) 

1040 

1041 self.set_header( 

1042 "Content-Security-Policy", 

1043 "default-src 'self';" 

1044 f"script-src {' '.join(script_src)};" 

1045 f"connect-src {' '.join(connect_src)};" 

1046 "style-src 'self' 'unsafe-inline';" 

1047 "img-src 'self' https://img.zeit.de https://github.asozial.org;" 

1048 "frame-ancestors 'self';" 

1049 "sandbox allow-downloads allow-same-origin allow-modals" 

1050 " allow-popups-to-escape-sandbox allow-scripts allow-popups" 

1051 " allow-top-navigation-by-user-activation allow-forms;" 

1052 "report-to default;" 

1053 "base-uri 'none';" 

1054 + ( 

1055 f"report-uri {self.get_reporting_api_endpoint()};" 

1056 if self.settings.get("REPORTING") 

1057 else "" 

1058 ), 

1059 ) 

1060 

1061 @override 

1062 def set_default_headers(self) -> None: 

1063 """Set default headers.""" 

1064 self.set_csp_header() 

1065 self.active_origin_trials = set() 

1066 if self.settings.get("REPORTING"): 

1067 endpoint = self.get_reporting_api_endpoint() 

1068 self.set_header( 

1069 "Reporting-Endpoints", 

1070 f'default="{endpoint}"', # noqa: B907 

1071 ) 

1072 self.set_header( 

1073 "Report-To", 

1074 json.dumps( 

1075 { 

1076 "group": "default", 

1077 "max_age": 2592000, 

1078 "endpoints": [{"url": endpoint}], 

1079 }, 

1080 option=ORJSON_OPTIONS, 

1081 ), 

1082 ) 

1083 self.set_header("NEL", '{"report_to":"default","max_age":2592000}') 

1084 self.set_header("X-Content-Type-Options", "nosniff") 

1085 self.set_header("Access-Control-Max-Age", "7200") 

1086 self.set_header("Access-Control-Allow-Origin", "*") 

1087 self.set_header("Access-Control-Allow-Headers", "*") 

1088 self.set_header( 

1089 "Access-Control-Allow-Methods", 

1090 ", ".join(self.get_allowed_methods()), 

1091 ) 

1092 self.set_header("Cross-Origin-Resource-Policy", "cross-origin") 

1093 self.set_header( 

1094 "Permissions-Policy", 

1095 "browsing-topics=()," 

1096 "identity-credentials-get=()," 

1097 "join-ad-interest-group=()," 

1098 "private-state-token-issuance=()," 

1099 "private-state-token-redemption=()," 

1100 "run-ad-auction=()", 

1101 ) 

1102 self.set_header("Referrer-Policy", "same-origin") 

1103 self.set_header( 

1104 "Cross-Origin-Opener-Policy", "same-origin; report-to=default" 

1105 ) 

1106 if self.request.path == "/kaenguru-comics-alt": # TODO: improve this 

1107 self.set_header( 

1108 "Cross-Origin-Embedder-Policy", 

1109 "credentialless; report-to=default", 

1110 ) 

1111 else: 

1112 self.set_header( 

1113 "Cross-Origin-Embedder-Policy", 

1114 "require-corp; report-to=default", 

1115 ) 

1116 if self.settings.get("HSTS"): 

1117 self.set_header("Strict-Transport-Security", "max-age=63072000") 

1118 if ( 

1119 onion_address := self.settings.get("ONION_ADDRESS") 

1120 ) and not self.request.host_name.endswith(".onion"): 

1121 self.set_header( 

1122 "Onion-Location", 

1123 onion_address 

1124 + self.request.path 

1125 + (f"?{self.request.query}" if self.request.query else ""), 

1126 ) 

1127 if self.settings.get("debug"): 

1128 self.set_header("X-Debug", bool_to_str(True)) 

1129 for permission in Permission: 

1130 if permission.name: 

1131 self.set_header( 

1132 f"X-Permission-{permission.name}", 

1133 bool_to_str(bool(self.is_authorized(permission))), 

1134 ) 

1135 self.set_header("Vary", "Accept, Authorization, Cookie") 

1136 

1137 set_default_headers.__doc__ = _RequestHandler.set_default_headers.__doc__ 

1138 

1139 @classmethod 

1140 def supports_head(cls) -> bool: 

1141 """Check whether this request handler supports HEAD requests.""" 

1142 signature = inspect.signature(cls.get) 

1143 return ( 

1144 "head" in signature.parameters 

1145 and signature.parameters["head"].kind 

1146 == inspect.Parameter.KEYWORD_ONLY 

1147 ) 

1148 

1149 @cached_property 

1150 def user_settings(self) -> Options: 

1151 """Get the user settings.""" 

1152 return Options(self) 

1153 

1154 @override 

1155 def write(self, chunk: str | bytes | dict[str, Any]) -> None: # noqa: D102 

1156 if self._finished: 

1157 raise RuntimeError("Cannot write() after finish()") 

1158 

1159 self.set_content_type_header() 

1160 

1161 if isinstance(chunk, dict): 

1162 chunk = self.dump(chunk) 

1163 

1164 if self.now.date() == date(self.now.year, 4, 27): 

1165 if isinstance(chunk, bytes): 

1166 with contextlib.suppress(UnicodeDecodeError): 

1167 chunk = chunk.decode("UTF-8") 

1168 if isinstance(chunk, str): 

1169 chunk = regex.sub( 

1170 r"\b\p{Lu}\p{Ll}{4}\p{Ll}*\b", 

1171 lambda match: ( 

1172 "Stanley" 

1173 if Random(match[0]).randrange(5) == self.now.year % 5 

1174 else match[0] 

1175 ), 

1176 chunk, 

1177 ) 

1178 

1179 super().write(chunk) 

1180 

1181 write.__doc__ = _RequestHandler.write.__doc__ 

1182 

1183 @override 

1184 def write_error(self, status_code: int, **kwargs: Any) -> None: 

1185 """Render the error page.""" 

1186 dict_content_types: tuple[str, str] = ( 

1187 "application/json", 

1188 "application/yaml", 

1189 ) 

1190 all_error_content_types: tuple[str, ...] = ( 

1191 # text/plain as first (default), to not screw up output in terminals 

1192 "text/plain", 

1193 "text/html", 

1194 "text/markdown", 

1195 *dict_content_types, 

1196 "application/vnd.asozial.dynload+json", 

1197 ) 

1198 

1199 if self.content_type not in all_error_content_types: 

1200 # don't send 406, instead default with text/plain 

1201 self.handle_accept_header(all_error_content_types, strict=False) 

1202 

1203 if self.content_type == "text/html": 

1204 self.render( # type: ignore[unused-awaitable] 

1205 "error.html", 

1206 status=status_code, 

1207 reason=self.get_error_message(**kwargs), 

1208 description=self.get_error_page_description(status_code), 

1209 is_traceback="exc_info" in kwargs 

1210 and not issubclass(kwargs["exc_info"][0], HTTPError) 

1211 and ( 

1212 self.settings.get("serve_traceback") 

1213 or self.is_authorized(Permission.TRACEBACK) 

1214 ), 

1215 ) 

1216 return 

1217 

1218 if self.content_type in dict_content_types: 

1219 self.finish( # type: ignore[unused-awaitable] 

1220 { 

1221 "status": status_code, 

1222 "reason": self.get_error_message(**kwargs), 

1223 } 

1224 ) 

1225 return 

1226 

1227 self.finish( # type: ignore[unused-awaitable] 

1228 f"{status_code} {self.get_error_message(**kwargs)}\n" 

1229 ) 

1230 

1231 write_error.__doc__ = _RequestHandler.write_error.__doc__