Coverage for an_website/utils/base_request_handler.py: 78.528%
489 statements
« prev ^ index » next coverage.py v7.9.1, created at 2025-07-07 20:06 +0000
« prev ^ index » next coverage.py v7.9.1, created at 2025-07-07 20:06 +0000
1# This program is free software: you can redistribute it and/or modify
2# it under the terms of the GNU Affero General Public License as
3# published by the Free Software Foundation, either version 3 of the
4# License, or (at your option) any later version.
5#
6# This program is distributed in the hope that it will be useful,
7# but WITHOUT ANY WARRANTY; without even the implied warranty of
8# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9# GNU Affero General Public License for more details.
10#
11# You should have received a copy of the GNU Affero General Public License
12# along with this program. If not, see <https://www.gnu.org/licenses/>.
13# pylint: disable=too-many-lines
15"""
16The base request handler used by other modules.
18This should only contain the BaseRequestHandler class.
19"""
21from __future__ import annotations
23import contextlib
24import inspect
25import logging
26import secrets
27import sys
28import traceback
29import uuid
30from asyncio import Future
31from base64 import b64decode
32from collections.abc import Awaitable, Callable, Coroutine
33from contextvars import ContextVar
34from datetime import date, datetime, timedelta, timezone, tzinfo
35from functools import cached_property, partial, reduce
36from random import Random, choice as random_choice
37from types import TracebackType
38from typing import Any, ClassVar, Final, cast, override
39from urllib.parse import SplitResult, urlsplit, urlunsplit
40from zoneinfo import ZoneInfo
42import elasticapm
43import html2text
44import orjson as json
45import regex
46import tornado.web
47import yaml
48from accept_types import get_best_match # type: ignore[import-untyped]
49from ansi2html import Ansi2HTMLConverter
50from bs4 import BeautifulSoup
51from dateutil.easter import easter
52from elastic_transport import ApiError, TransportError
53from elasticsearch import AsyncElasticsearch
54from openmoji_dist import VERSION as OPENMOJI_VERSION
55from redis.asyncio import Redis
56from tornado.httputil import HTTPServerRequest
57from tornado.iostream import StreamClosedError
58from tornado.web import (
59 Finish,
60 GZipContentEncoding,
61 HTTPError,
62 MissingArgumentError,
63 OutputTransform,
64)
66from .. import (
67 EVENT_ELASTICSEARCH,
68 EVENT_REDIS,
69 GH_ORG_URL,
70 GH_PAGES_URL,
71 GH_REPO_URL,
72 NAME,
73 ORJSON_OPTIONS,
74 pytest_is_running,
75)
76from .decorators import is_authorized
77from .options import ColourScheme, Options
78from .static_file_handling import FILE_HASHES_DICT, fix_static_path
79from .themes import THEMES
80from .utils import (
81 ModuleInfo,
82 Permission,
83 add_args_to_url,
84 ansi_replace,
85 apply,
86 backspace_replace,
87 bool_to_str,
88 emoji2html,
89 geoip,
90 hash_bytes,
91 is_prime,
92 ratelimit,
93 str_to_bool,
94)
96LOGGER: Final = logging.getLogger(__name__)
98TEXT_CONTENT_TYPES: Final[set[str]] = {
99 "application/javascript",
100 "application/json",
101 "application/vnd.asozial.dynload+json",
102 "application/x-ndjson",
103 "application/xml",
104 "application/yaml",
105}
107request_ctx_var: ContextVar[HTTPServerRequest] = ContextVar("current_request")
110class _RequestHandler(tornado.web.RequestHandler):
111 """Base for Tornado request handlers."""
113 crawler: bool = False
115 @override
116 async def _execute(
117 self, transforms: list[OutputTransform], *args: bytes, **kwargs: bytes
118 ) -> None:
119 request_ctx_var.set(self.request)
120 return await super()._execute(transforms, *args, **kwargs)
122 # pylint: disable-next=protected-access
123 _execute.__doc__ = tornado.web.RequestHandler._execute.__doc__
125 @property
126 def apm_client(self) -> None | elasticapm.Client:
127 """Get the APM client from the settings."""
128 return self.settings.get("ELASTIC_APM", {}).get("CLIENT") # type: ignore[no-any-return]
130 @property
131 def apm_enabled(self) -> bool:
132 """Return whether APM is enabled."""
133 return bool(self.settings.get("ELASTIC_APM", {}).get("ENABLED"))
135 @override
136 def data_received( # noqa: D102
137 self, chunk: bytes
138 ) -> None | Awaitable[None]:
139 pass
141 data_received.__doc__ = tornado.web.RequestHandler.data_received.__doc__
143 @property
144 def elasticsearch(self) -> AsyncElasticsearch:
145 """
146 Get the Elasticsearch client from the settings.
148 This is None if Elasticsearch is not enabled.
149 """
150 return cast(AsyncElasticsearch, self.settings.get("ELASTICSEARCH"))
152 @property
153 def elasticsearch_prefix(self) -> str:
154 """Get the Elasticsearch prefix from the settings."""
155 return self.settings.get( # type: ignore[no-any-return]
156 "ELASTICSEARCH_PREFIX", NAME
157 )
159 def geoip(
160 self,
161 ip: None | str = None,
162 database: str = geoip.__defaults__[0], # type: ignore[index]
163 *,
164 allow_fallback: bool = True,
165 ) -> Coroutine[None, None, None | dict[str, Any]]:
166 """Get GeoIP information."""
167 if not ip:
168 ip = self.request.remote_ip
169 if not EVENT_ELASTICSEARCH.is_set():
170 return geoip(ip, database)
171 return geoip(
172 ip, database, self.elasticsearch, allow_fallback=allow_fallback
173 )
175 async def get_time(self) -> datetime:
176 """Get the start time of the request in the users' timezone."""
177 tz: tzinfo = timezone.utc
178 try:
179 geoip = await self.geoip() # pylint: disable=redefined-outer-name
180 except (ApiError, TransportError):
181 LOGGER.exception("Elasticsearch request failed")
182 if self.apm_client:
183 self.apm_client.capture_exception() # type: ignore[no-untyped-call]
184 else:
185 if geoip and "timezone" in geoip:
186 tz = ZoneInfo(geoip["timezone"])
187 return datetime.fromtimestamp(
188 self.request._start_time, tz=tz # pylint: disable=protected-access
189 )
191 def is_authorized(
192 self, permission: Permission, allow_cookie_auth: bool = True
193 ) -> bool | None:
194 """Check whether the request is authorized."""
195 return is_authorized(self, permission, allow_cookie_auth)
197 @override
198 def log_exception(
199 self,
200 typ: None | type[BaseException],
201 value: None | BaseException,
202 tb: None | TracebackType,
203 ) -> None:
204 if isinstance(value, HTTPError):
205 super().log_exception(typ, value, tb)
206 elif typ is StreamClosedError:
207 LOGGER.debug(
208 "Stream closed %s",
209 self._request_summary(),
210 exc_info=(typ, value, tb), # type: ignore[arg-type]
211 )
212 else:
213 LOGGER.error(
214 "Uncaught exception %s",
215 self._request_summary(),
216 exc_info=(typ, value, tb), # type: ignore[arg-type]
217 )
219 log_exception.__doc__ = tornado.web.RequestHandler.log_exception.__doc__
221 @cached_property
222 def now(self) -> datetime:
223 """Get the current time."""
224 # pylint: disable=method-hidden
225 if pytest_is_running():
226 raise AssertionError("Now accessed before it was set")
227 if self.request.method in self.SUPPORTED_METHODS:
228 LOGGER.error("Now accessed before it was set", stacklevel=3)
229 return datetime.fromtimestamp(
230 self.request._start_time, # pylint: disable=protected-access
231 tz=timezone.utc,
232 )
234 @override
235 async def prepare(self) -> None:
236 """Check authorization and call self.ratelimit()."""
237 # pylint: disable=invalid-overridden-method
238 self.now = await self.get_time()
240 if crawler_secret := self.settings.get("CRAWLER_SECRET"):
241 self.crawler = crawler_secret in self.request.headers.get(
242 "User-Agent", ""
243 )
245 if (
246 self.request.method in {"GET", "HEAD"}
247 and self.redirect_to_canonical_domain()
248 ):
249 return
251 if self.request.method != "OPTIONS" and not await self.ratelimit(True):
252 await self.ratelimit()
254 async def ratelimit(self, global_ratelimit: bool = False) -> bool:
255 """Take b1nzy to space using Redis."""
256 if (
257 not self.settings.get("RATELIMITS")
258 or self.request.method == "OPTIONS"
259 or self.is_authorized(Permission.RATELIMITS)
260 or self.crawler
261 ):
262 return False
264 if not EVENT_REDIS.is_set():
265 LOGGER.warning(
266 (
267 "Ratelimits are enabled, but Redis is not available. "
268 "This can happen shortly after starting the website."
269 ),
270 )
271 raise HTTPError(503)
273 if global_ratelimit: # TODO: add to _RequestHandler
274 ratelimited, headers = await ratelimit(
275 self.redis,
276 self.redis_prefix,
277 str(self.request.remote_ip),
278 bucket=None,
279 max_burst=99, # limit = 100
280 count_per_period=20, # 20 requests per second
281 period=1,
282 tokens=10 if self.settings.get("UNDER_ATTACK") else 1,
283 )
284 else:
285 method = (
286 "GET" if self.request.method == "HEAD" else self.request.method
287 )
288 if not (limit := getattr(self, f"RATELIMIT_{method}_LIMIT", 0)):
289 return False
290 ratelimited, headers = await ratelimit(
291 self.redis,
292 self.redis_prefix,
293 str(self.request.remote_ip),
294 bucket=getattr(
295 self,
296 f"RATELIMIT_{method}_BUCKET",
297 self.__class__.__name__.lower(),
298 ),
299 max_burst=limit - 1,
300 count_per_period=getattr( # request count per period
301 self,
302 f"RATELIMIT_{method}_COUNT_PER_PERIOD",
303 30,
304 ),
305 period=getattr(
306 self, f"RATELIMIT_{method}_PERIOD", 60 # period in seconds
307 ),
308 tokens=1 if self.request.method != "HEAD" else 0,
309 )
311 for header, value in headers.items():
312 self.set_header(header, value)
314 if ratelimited:
315 if self.now.date() == date(self.now.year, 4, 20):
316 self.set_status(420)
317 self.write_error(420)
318 else:
319 self.set_status(429)
320 self.write_error(429)
322 return ratelimited
324 def redirect_to_canonical_domain(self) -> bool:
325 """Redirect to the canonical domain."""
326 if (
327 not (domain := self.settings.get("DOMAIN"))
328 or not self.request.headers.get("Host")
329 or self.request.host_name == domain
330 or self.request.host_name.endswith((".onion", ".i2p"))
331 or regex.fullmatch(r"/[\u2800-\u28FF]+/?", self.request.path)
332 ):
333 return False
334 port = urlsplit(f"//{self.request.headers['Host']}").port
335 self.redirect(
336 urlsplit(self.request.full_url())
337 ._replace(netloc=f"{domain}:{port}" if port else domain)
338 .geturl(),
339 permanent=True,
340 )
341 return True
343 @property
344 def redis(self) -> Redis[str]:
345 """
346 Get the Redis client from the settings.
348 This is None if Redis is not enabled.
349 """
350 return cast("Redis[str]", self.settings.get("REDIS"))
352 @property
353 def redis_prefix(self) -> str:
354 """Get the Redis prefix from the settings."""
355 return self.settings.get( # type: ignore[no-any-return]
356 "REDIS_PREFIX", NAME
357 )
360class BaseRequestHandler(_RequestHandler):
361 """The base request handler used by every page and API."""
363 # pylint: disable=too-many-instance-attributes, too-many-public-methods
365 ELASTIC_RUM_URL: ClassVar[str] = (
366 f"/@apm-rum/elastic-apm-rum.umd{'' if sys.flags.dev_mode else '.min'}.js"
367 "?v=5.12.0"
368 )
370 COMPUTE_ETAG: ClassVar[bool] = True
371 ALLOW_COMPRESSION: ClassVar[bool] = True
372 MAX_BODY_SIZE: ClassVar[None | int] = None
373 ALLOWED_METHODS: ClassVar[tuple[str, ...]] = ("GET",)
374 POSSIBLE_CONTENT_TYPES: ClassVar[tuple[str, ...]] = ()
376 module_info: ModuleInfo
377 # info about page, can be overridden in module_info
378 title: str = "Das Asoziale Netzwerk"
379 short_title: str = "Asoziales Netzwerk"
380 description: str = "Die tolle Webseite des Asozialen Netzwerks"
382 used_render: bool = False
384 active_origin_trials: set[str]
385 content_type: None | str = None
386 apm_script: None | str
387 nonce: str
389 def _finish(
390 self, chunk: None | str | bytes | dict[str, Any] = None
391 ) -> Future[None]:
392 if self._finished:
393 raise RuntimeError("finish() called twice")
395 if chunk is not None:
396 self.write(chunk)
398 if ( # pylint: disable=too-many-boolean-expressions
399 (content_type := self.content_type)
400 and (
401 content_type in TEXT_CONTENT_TYPES
402 or content_type.startswith("text/")
403 or content_type.endswith(("+xml", "+json"))
404 )
405 and self._write_buffer
406 and not self._write_buffer[-1].endswith(b"\n")
407 ):
408 self.write(b"\n")
410 return super().finish()
412 @override
413 def compute_etag(self) -> None | str:
414 """Compute ETag with Base85 encoding."""
415 if not self.COMPUTE_ETAG:
416 return None
417 return f'"{hash_bytes(*self._write_buffer)}"' # noqa: B907
419 @override
420 def decode_argument( # noqa: D102
421 self, value: bytes, name: str | None = None
422 ) -> str:
423 try:
424 return value.decode("UTF-8", "replace")
425 except UnicodeDecodeError as exc:
426 err_msg = f"Invalid unicode in {name or 'url'}: {value[:40]!r}"
427 LOGGER.exception(err_msg, exc_info=exc)
428 raise HTTPError(400, err_msg) from exc
430 @property
431 def dump(self) -> Callable[[Any], str | bytes]:
432 """Get the function for dumping the output."""
433 yaml_subset = self.content_type in {
434 "application/json",
435 "application/vnd.asozial.dynload+json",
436 }
438 if self.content_type == "application/yaml":
439 if self.now.timetuple()[2:0:-1] == (1, 4):
440 yaml_subset = True
441 else:
442 return lambda spam: yaml.dump(
443 spam,
444 width=self.get_int_argument("yaml_width", 80, min_=80),
445 )
447 if yaml_subset:
448 option = ORJSON_OPTIONS
449 if self.get_bool_argument("pretty", False):
450 option |= json.OPT_INDENT_2
451 return lambda spam: json.dumps(spam, option=option)
453 return lambda spam: spam
455 @override
456 def finish( # noqa: D102
457 self, chunk: None | str | bytes | dict[Any, Any] = None
458 ) -> Future[None]:
459 as_json = self.content_type == "application/vnd.asozial.dynload+json"
460 as_plain_text = self.content_type == "text/plain"
461 as_markdown = self.content_type == "text/markdown"
463 if (
464 not isinstance(chunk, bytes | str)
465 or self.content_type == "text/html"
466 or not self.used_render
467 or not (as_json or as_plain_text or as_markdown)
468 ):
469 return self._finish(chunk)
471 chunk = chunk.decode("UTF-8") if isinstance(chunk, bytes) else chunk
473 if as_markdown:
474 return self._finish(
475 f"# {self.title}\n\n"
476 + html2text.html2text(chunk, self.request.full_url()).strip()
477 )
479 soup = BeautifulSoup(chunk, features="lxml")
481 if as_plain_text:
482 return self._finish(soup.get_text("\n", True))
484 dictionary: dict[str, object] = {
485 "url": self.fix_url(),
486 "title": self.title,
487 "short_title": (
488 self.short_title if self.title != self.short_title else None
489 ),
490 "body": "".join(
491 str(element)
492 for element in soup.find_all(name="main")[0].contents
493 ).strip(),
494 "scripts": [
495 {"script": script.string} | script.attrs
496 for script in soup.find_all("script")
497 ],
498 "stylesheets": [
499 stylesheet.get("href").strip()
500 for stylesheet in soup.find_all("link", rel="stylesheet")
501 ],
502 "css": "\n".join(style.string for style in soup.find_all("style")),
503 }
505 return self._finish(dictionary)
507 finish.__doc__ = _RequestHandler.finish.__doc__
509 def finish_dict(self, **kwargs: Any) -> Future[None]:
510 """Finish the request with a dictionary."""
511 return self.finish(kwargs)
513 def fix_url(
514 self,
515 url: None | str | SplitResult = None,
516 new_path: None | str = None,
517 **query_args: None | str | bool | float,
518 ) -> str:
519 """
520 Fix a URL and return it.
522 If the URL is from another website, link to it with the redirect page,
523 otherwise just return the URL with no_3rd_party appended.
524 """
525 if url is None:
526 url = self.request.full_url()
527 if isinstance(url, str):
528 url = urlsplit(url)
529 if url.netloc and url.netloc.lower() != self.request.host.lower():
530 if (
531 not self.user_settings.ask_before_leaving
532 or not self.settings.get("REDIRECT_MODULE_LOADED")
533 ):
534 return url.geturl()
535 path = "/redirect"
536 query_args["to"] = url.geturl()
537 url = urlsplit(self.request.full_url())
538 else:
539 path = url.path if new_path is None else new_path
540 path = f"/{path.strip('/')}".lower()
541 if path == "/lolwut":
542 path = path.upper()
543 if path.startswith("/soundboard/files/") or path in FILE_HASHES_DICT:
544 query_args.update(
545 dict.fromkeys(self.user_settings.iter_option_names())
546 )
547 else:
548 for (
549 key,
550 value,
551 ) in self.user_settings.as_dict_with_str_values().items():
552 query_args.setdefault(key, value)
553 for key, value in self.user_settings.as_dict_with_str_values(
554 include_query_argument=False,
555 include_body_argument=self.request.path == "/einstellungen"
556 and self.get_bool_argument("save_in_cookie", False),
557 ).items():
558 if value == query_args[key]:
559 query_args[key] = None
561 return add_args_to_url(
562 urlunsplit(
563 (
564 self.request.protocol,
565 self.request.host,
566 "" if path == "/" else path,
567 url.query,
568 url.fragment,
569 )
570 ),
571 **query_args,
572 )
574 @classmethod
575 def get_allowed_methods(cls) -> list[str]:
576 """Get allowed methods."""
577 methods = {"OPTIONS", *cls.ALLOWED_METHODS}
578 if "GET" in cls.ALLOWED_METHODS and cls.supports_head():
579 methods.add("HEAD")
580 return sorted(methods)
582 def get_bool_argument(
583 self,
584 name: str,
585 default: None | bool = None,
586 ) -> bool:
587 """Get an argument parsed as boolean."""
588 if default is not None:
589 return str_to_bool(self.get_argument(name, ""), default)
590 value = str(self.get_argument(name))
591 try:
592 return str_to_bool(value)
593 except ValueError as err:
594 raise HTTPError(400, f"{value} is not a boolean") from err
596 def get_display_scheme(self) -> ColourScheme:
597 """Get the scheme currently displayed."""
598 scheme = self.user_settings.scheme
599 if scheme == "random":
600 return ("light", "dark")[self.now.microsecond & 1]
601 return scheme
603 def get_display_theme(self) -> str:
604 """Get the theme currently displayed."""
605 theme = self.user_settings.theme
607 if theme == "default" and self.now.month == 12:
608 return "christmas"
610 if theme != "random":
611 return theme
613 ignore_themes = ("random", "christmas")
615 return random_choice( # nosec: B311
616 tuple(theme for theme in THEMES if theme not in ignore_themes)
617 )
619 def get_error_message(self, **kwargs: Any) -> str:
620 """
621 Get the error message and return it.
623 If the serve_traceback setting is true (debug mode is activated),
624 the traceback gets returned.
625 """
626 if "exc_info" in kwargs and not issubclass(
627 kwargs["exc_info"][0], HTTPError
628 ):
629 if self.settings.get("serve_traceback") or self.is_authorized(
630 Permission.TRACEBACK
631 ):
632 return "".join(
633 traceback.format_exception(*kwargs["exc_info"])
634 ).strip()
635 return "".join(
636 traceback.format_exception_only(*kwargs["exc_info"][:2])
637 ).strip()
638 if "exc_info" in kwargs and issubclass(
639 kwargs["exc_info"][0], MissingArgumentError
640 ):
641 return cast(str, kwargs["exc_info"][1].log_message)
642 return str(self._reason)
644 def get_error_page_description(self, status_code: int) -> str:
645 """Get the description for the error page."""
646 # pylint: disable=too-many-return-statements
647 # https://developer.mozilla.org/docs/Web/HTTP/Status
648 if 100 <= status_code <= 199:
649 return "Hier gibt es eine total wichtige Information."
650 if 200 <= status_code <= 299:
651 return "Hier ist alles super! 🎶🎶"
652 if 300 <= status_code <= 399:
653 return "Eine Umleitung ist eingerichtet."
654 if 400 <= status_code <= 499:
655 if status_code == 404:
656 return f"{self.request.path} wurde nicht gefunden."
657 if status_code == 451:
658 return "Hier wäre bestimmt geiler Scheiß."
659 return "Ein Client-Fehler ist aufgetreten."
660 if 500 <= status_code <= 599:
661 return "Ein Server-Fehler ist aufgetreten."
662 raise ValueError(
663 f"{status_code} is not a valid HTTP response status code."
664 )
666 def get_int_argument(
667 self,
668 name: str,
669 default: None | int = None,
670 *,
671 max_: None | int = None,
672 min_: None | int = None,
673 ) -> int:
674 """Get an argument parsed as integer."""
675 if default is None:
676 str_value = self.get_argument(name)
677 try:
678 value = int(str_value, base=0)
679 except ValueError as err:
680 raise HTTPError(400, f"{str_value} is not an integer") from err
681 elif self.get_argument(name, ""):
682 try:
683 value = int(self.get_argument(name), base=0)
684 except ValueError:
685 value = default
686 else:
687 value = default
689 if max_ is not None:
690 value = min(max_, value)
691 if min_ is not None:
692 value = max(min_, value)
694 return value
696 def get_module_infos(self) -> tuple[ModuleInfo, ...]:
697 """Get the module infos."""
698 return self.settings.get("MODULE_INFOS") or ()
700 def get_reporting_api_endpoint(self) -> None | str:
701 """Get the endpoint for the Reporting API™️."""
702 if not self.settings.get("REPORTING"):
703 return None
704 endpoint = self.settings.get("REPORTING_ENDPOINT")
706 if not endpoint or not endpoint.startswith("/"):
707 return endpoint
709 return f"{self.request.protocol}://{self.request.host}{endpoint}"
711 @override
712 def get_template_namespace(self) -> dict[str, Any]:
713 """
714 Add useful things to the template namespace and return it.
716 They are mostly needed by most of the pages (like title,
717 description and no_3rd_party).
718 """
719 namespace = super().get_template_namespace()
720 ansi2html = partial(
721 Ansi2HTMLConverter(inline=True, scheme="xterm").convert, full=False
722 )
723 namespace.update(self.user_settings.as_dict())
724 namespace.update(
725 ansi2html=partial(
726 reduce, apply, (ansi2html, ansi_replace, backspace_replace)
727 ),
728 apm_script=(
729 self.settings["ELASTIC_APM"].get("INLINE_SCRIPT")
730 if self.apm_enabled
731 else None
732 ),
733 as_html=self.content_type == "text/html",
734 c=self.now.date() == date(self.now.year, 4, 1)
735 or str_to_bool(self.get_cookie("c", "f") or "f", False),
736 canonical_url=self.fix_url(
737 self.request.full_url().upper()
738 if self.request.path.upper().startswith("/LOLWUT")
739 else self.request.full_url().lower()
740 ).split("?")[0],
741 description=self.description,
742 display_theme=self.get_display_theme(),
743 display_scheme=self.get_display_scheme(),
744 elastic_rum_url=self.ELASTIC_RUM_URL,
745 fix_static=lambda path: self.fix_url(fix_static_path(path)),
746 fix_url=self.fix_url,
747 emoji2html=(
748 emoji2html
749 if self.user_settings.openmoji == "img"
750 else (
751 (lambda emoji: f'<span class="openmoji">{emoji}</span>')
752 if self.user_settings.openmoji
753 else (lambda emoji: f"<span>{emoji}</span>")
754 )
755 ),
756 form_appendix=self.user_settings.get_form_appendix(),
757 GH_ORG_URL=GH_ORG_URL,
758 GH_PAGES_URL=GH_PAGES_URL,
759 GH_REPO_URL=GH_REPO_URL,
760 keywords="Asoziales Netzwerk, Känguru-Chroniken"
761 + (
762 f", {self.module_info.get_keywords_as_str(self.request.path)}"
763 if self.module_info # type: ignore[truthy-bool]
764 else ""
765 ),
766 lang="de", # TODO: add language support
767 nonce=self.nonce,
768 now=self.now,
769 openmoji_version=OPENMOJI_VERSION,
770 settings=self.settings,
771 short_title=self.short_title,
772 testing=pytest_is_running(),
773 title=self.title,
774 )
775 namespace.update(
776 {
777 "🥚": timedelta()
778 <= self.now.date() - easter(self.now.year)
779 < timedelta(days=2),
780 "🦘": is_prime(self.now.microsecond),
781 }
782 )
783 return namespace
785 def get_user_id(self) -> str:
786 """Get the user id saved in the cookie or create one."""
787 cookie = self.get_secure_cookie(
788 "user_id",
789 max_age_days=90,
790 min_version=2,
791 )
793 user_id = cookie.decode("UTF-8") if cookie else str(uuid.uuid4())
795 if not self.get_secure_cookie( # save it in cookie or reset expiry date
796 "user_id", max_age_days=30, min_version=2
797 ):
798 self.set_secure_cookie(
799 "user_id",
800 user_id,
801 expires_days=90,
802 path="/",
803 samesite="Strict",
804 )
806 return user_id
808 def handle_accept_header( # pylint: disable=inconsistent-return-statements
809 self, possible_content_types: tuple[str, ...], strict: bool = True
810 ) -> None:
811 """Handle the Accept header and set `self.content_type`."""
812 if not possible_content_types:
813 return
814 content_type = get_best_match(
815 self.request.headers.get("Accept") or "*/*",
816 possible_content_types,
817 )
818 if content_type is None:
819 if strict:
820 return self.handle_not_acceptable(possible_content_types)
821 content_type = possible_content_types[0]
822 self.content_type = content_type
823 self.set_content_type_header()
825 def handle_not_acceptable(
826 self, possible_content_types: tuple[str, ...]
827 ) -> None:
828 """Only call this if we cannot respect the Accept header."""
829 self.clear_header("Content-Type")
830 self.set_status(406)
831 raise Finish("\n".join(possible_content_types) + "\n")
833 def head(self, *args: Any, **kwargs: Any) -> None | Awaitable[None]:
834 """Handle HEAD requests."""
835 if self.get.__module__ == "tornado.web":
836 raise HTTPError(405)
837 if not self.supports_head():
838 raise HTTPError(501)
840 kwargs["head"] = True
841 return self.get(*args, **kwargs)
843 @override
844 def initialize(
845 self,
846 *,
847 module_info: ModuleInfo,
848 # default is true, because then empty args dicts are
849 # enough to specify that the defaults should be used
850 default_title: bool = True,
851 default_description: bool = True,
852 ) -> None:
853 """
854 Get title and description from the kwargs.
856 If title and description are present in the kwargs,
857 then they override self.title and self.description.
858 """
859 self.module_info = module_info
860 if not default_title:
861 page_info = self.module_info.get_page_info(self.request.path)
862 self.title = page_info.name
863 self.short_title = page_info.short_name or self.title
864 if not default_description:
865 self.description = self.module_info.get_page_info(
866 self.request.path
867 ).description
869 @override
870 async def options(self, *args: Any, **kwargs: Any) -> None:
871 """Handle OPTIONS requests."""
872 # pylint: disable=unused-argument
873 self.set_header("Allow", ", ".join(self.get_allowed_methods()))
874 self.set_status(204)
875 await self.finish()
877 def origin_trial(self, token: bytes | str) -> bool:
878 """Enable an experimental feature."""
879 # pylint: disable=protected-access
880 payload = json.loads(b64decode(token)[69:])
881 if payload["feature"] in self.active_origin_trials:
882 return True
883 origin = urlsplit(payload["origin"])
884 url = urlsplit(self.request.full_url())
885 if url.port is None and url.scheme in {"http", "https"}:
886 url = url._replace(
887 netloc=f"{url.hostname}:{443 if url.scheme == 'https' else 80}"
888 )
889 if self.request._start_time > payload["expiry"]:
890 return False
891 if url.scheme != origin.scheme:
892 return False
893 if url.netloc != origin.netloc and not (
894 payload.get("isSubdomain")
895 and url.netloc.endswith(f".{origin.netloc}")
896 ):
897 return False
898 self.add_header("Origin-Trial", token)
899 self.active_origin_trials.add(payload["feature"])
900 return True
902 @override
903 async def prepare(self) -> None:
904 """Check authorization and call self.ratelimit()."""
905 await super().prepare()
907 if self._finished:
908 return
910 if not self.ALLOW_COMPRESSION:
911 for transform in self._transforms:
912 if isinstance(transform, GZipContentEncoding):
913 # pylint: disable=protected-access
914 transform._gzipping = False
916 self.handle_accept_header(self.POSSIBLE_CONTENT_TYPES)
918 if self.request.method == "GET" and (
919 days := Random(self.now.timestamp()).randint(0, 31337)
920 ) in {
921 69,
922 420,
923 1337,
924 31337,
925 }:
926 self.set_cookie("c", "s", expires_days=days / 24, path="/")
928 if (
929 self.request.method != "OPTIONS"
930 and self.MAX_BODY_SIZE is not None
931 and len(self.request.body) > self.MAX_BODY_SIZE
932 ):
933 LOGGER.warning(
934 "%s > MAX_BODY_SIZE (%s)",
935 len(self.request.body),
936 self.MAX_BODY_SIZE,
937 )
938 raise HTTPError(413)
940 @override
941 def render( # noqa: D102
942 self, template_name: str, **kwargs: Any
943 ) -> Future[None]:
944 self.used_render = True
945 return super().render(template_name, **kwargs)
947 render.__doc__ = _RequestHandler.render.__doc__
949 def set_content_type_header(self) -> None:
950 """Set the Content-Type header based on `self.content_type`."""
951 if str(self.content_type).startswith("text/"): # RFC 2616 (3.7.1)
952 self.set_header(
953 "Content-Type", f"{self.content_type};charset=utf-8"
954 )
955 elif self.content_type is not None:
956 self.set_header("Content-Type", self.content_type)
958 @override
959 def set_cookie( # noqa: D102 # pylint: disable=too-many-arguments
960 self,
961 name: str,
962 value: str | bytes,
963 domain: None | str = None,
964 expires: None | float | tuple[int, ...] | datetime = None,
965 path: str = "/",
966 expires_days: None | float = 400, # changed
967 *,
968 secure: bool | None = None,
969 httponly: bool = True,
970 **kwargs: Any,
971 ) -> None:
972 if "samesite" not in kwargs:
973 # default for same site should be strict
974 kwargs["samesite"] = "Strict"
976 super().set_cookie(
977 name,
978 value,
979 domain,
980 expires,
981 path,
982 expires_days,
983 secure=(
984 self.request.protocol == "https" if secure is None else secure
985 ),
986 httponly=httponly,
987 **kwargs,
988 )
990 set_cookie.__doc__ = _RequestHandler.set_cookie.__doc__
992 def set_csp_header(self) -> None:
993 """Set the Content-Security-Policy header."""
994 self.nonce = secrets.token_urlsafe(16)
996 script_src = ["'self'", f"'nonce-{self.nonce}'"]
998 if (
999 self.apm_enabled
1000 and "INLINE_SCRIPT_HASH" in self.settings["ELASTIC_APM"]
1001 ):
1002 script_src.extend(
1003 (
1004 f"'sha256-{self.settings['ELASTIC_APM']['INLINE_SCRIPT_HASH']}'",
1005 "'unsafe-inline'", # for browsers that don't support hash
1006 )
1007 )
1009 connect_src = ["'self'"]
1011 if self.apm_enabled and "SERVER_URL" in self.settings["ELASTIC_APM"]:
1012 rum_server_url = self.settings["ELASTIC_APM"].get("RUM_SERVER_URL")
1013 if rum_server_url:
1014 # the RUM agent needs to connect to rum_server_url
1015 connect_src.append(rum_server_url)
1016 elif rum_server_url is None:
1017 # the RUM agent needs to connect to ["ELASTIC_APM"]["SERVER_URL"]
1018 connect_src.append(self.settings["ELASTIC_APM"]["SERVER_URL"])
1020 connect_src.append( # fix for older browsers
1021 ("wss" if self.request.protocol == "https" else "ws")
1022 + f"://{self.request.host}"
1023 )
1025 self.set_header(
1026 "Content-Security-Policy",
1027 "default-src 'self';"
1028 f"script-src {' '.join(script_src)};"
1029 f"connect-src {' '.join(connect_src)};"
1030 "style-src 'self' 'unsafe-inline';"
1031 "img-src 'self' https://img.zeit.de https://github.asozial.org;"
1032 "frame-ancestors 'self';"
1033 "sandbox allow-downloads allow-same-origin allow-modals"
1034 " allow-popups-to-escape-sandbox allow-scripts allow-popups"
1035 " allow-top-navigation-by-user-activation allow-forms;"
1036 "report-to default;"
1037 "base-uri 'none';"
1038 + (
1039 f"report-uri {self.get_reporting_api_endpoint()};"
1040 if self.settings.get("REPORTING")
1041 else ""
1042 ),
1043 )
1045 @override
1046 def set_default_headers(self) -> None:
1047 """Set default headers."""
1048 self.set_csp_header()
1049 self.active_origin_trials = set()
1050 if self.settings.get("REPORTING"):
1051 endpoint = self.get_reporting_api_endpoint()
1052 self.set_header(
1053 "Reporting-Endpoints",
1054 f'default="{endpoint}"', # noqa: B907
1055 )
1056 self.set_header(
1057 "Report-To",
1058 json.dumps(
1059 {
1060 "group": "default",
1061 "max_age": 2592000,
1062 "endpoints": [{"url": endpoint}],
1063 },
1064 option=ORJSON_OPTIONS,
1065 ),
1066 )
1067 self.set_header("NEL", '{"report_to":"default","max_age":2592000}')
1068 self.set_header("X-Content-Type-Options", "nosniff")
1069 self.set_header("Access-Control-Max-Age", "7200")
1070 self.set_header("Access-Control-Allow-Origin", "*")
1071 self.set_header("Access-Control-Allow-Headers", "*")
1072 self.set_header(
1073 "Access-Control-Allow-Methods",
1074 ", ".join(self.get_allowed_methods()),
1075 )
1076 self.set_header("Cross-Origin-Resource-Policy", "cross-origin")
1077 self.set_header(
1078 "Permissions-Policy",
1079 "browsing-topics=(),"
1080 "identity-credentials-get=(),"
1081 "join-ad-interest-group=(),"
1082 "private-state-token-issuance=(),"
1083 "private-state-token-redemption=(),"
1084 "run-ad-auction=()",
1085 )
1086 self.set_header("Referrer-Policy", "same-origin")
1087 self.set_header(
1088 "Cross-Origin-Opener-Policy", "same-origin; report-to=default"
1089 )
1090 if self.request.path == "/kaenguru-comics-alt": # TODO: improve this
1091 self.set_header(
1092 "Cross-Origin-Embedder-Policy",
1093 "credentialless; report-to=default",
1094 )
1095 else:
1096 self.set_header(
1097 "Cross-Origin-Embedder-Policy",
1098 "require-corp; report-to=default",
1099 )
1100 if self.settings.get("HSTS"):
1101 self.set_header("Strict-Transport-Security", "max-age=63072000")
1102 if (
1103 onion_address := self.settings.get("ONION_ADDRESS")
1104 ) and not self.request.host_name.endswith(".onion"):
1105 self.set_header(
1106 "Onion-Location",
1107 onion_address
1108 + self.request.path
1109 + (f"?{self.request.query}" if self.request.query else ""),
1110 )
1111 if self.settings.get("debug"):
1112 self.set_header("X-Debug", bool_to_str(True))
1113 for permission in Permission:
1114 if permission.name:
1115 self.set_header(
1116 f"X-Permission-{permission.name}",
1117 bool_to_str(bool(self.is_authorized(permission))),
1118 )
1119 self.set_header("Vary", "Accept, Authorization, Cookie")
1121 set_default_headers.__doc__ = _RequestHandler.set_default_headers.__doc__
1123 @classmethod
1124 def supports_head(cls) -> bool:
1125 """Check whether this request handler supports HEAD requests."""
1126 signature = inspect.signature(cls.get)
1127 return (
1128 "head" in signature.parameters
1129 and signature.parameters["head"].kind
1130 == inspect.Parameter.KEYWORD_ONLY
1131 )
1133 @cached_property
1134 def user_settings(self) -> Options:
1135 """Get the user settings."""
1136 return Options(self)
1138 @override
1139 def write(self, chunk: str | bytes | dict[str, Any]) -> None: # noqa: D102
1140 if self._finished:
1141 raise RuntimeError("Cannot write() after finish()")
1143 self.set_content_type_header()
1145 if isinstance(chunk, dict):
1146 chunk = self.dump(chunk)
1148 if self.now.date() == date(self.now.year, 4, 27):
1149 if isinstance(chunk, bytes):
1150 with contextlib.suppress(UnicodeDecodeError):
1151 chunk = chunk.decode("UTF-8")
1152 if isinstance(chunk, str):
1153 chunk = regex.sub(
1154 r"\b\p{Lu}\p{Ll}{4}\p{Ll}*\b",
1155 lambda match: (
1156 "Stanley"
1157 if Random(match[0]).randrange(5) == self.now.year % 5
1158 else match[0]
1159 ),
1160 chunk,
1161 )
1163 super().write(chunk)
1165 write.__doc__ = _RequestHandler.write.__doc__
1167 @override
1168 def write_error(self, status_code: int, **kwargs: Any) -> None:
1169 """Render the error page."""
1170 dict_content_types: tuple[str, str] = (
1171 "application/json",
1172 "application/yaml",
1173 )
1174 all_error_content_types: tuple[str, ...] = (
1175 # text/plain as first (default), to not screw up output in terminals
1176 "text/plain",
1177 "text/html",
1178 "text/markdown",
1179 *dict_content_types,
1180 "application/vnd.asozial.dynload+json",
1181 )
1183 if self.content_type not in all_error_content_types:
1184 # don't send 406, instead default with text/plain
1185 self.handle_accept_header(all_error_content_types, strict=False)
1187 if self.content_type == "text/html":
1188 self.render( # type: ignore[unused-awaitable]
1189 "error.html",
1190 status=status_code,
1191 reason=self.get_error_message(**kwargs),
1192 description=self.get_error_page_description(status_code),
1193 is_traceback="exc_info" in kwargs
1194 and not issubclass(kwargs["exc_info"][0], HTTPError)
1195 and (
1196 self.settings.get("serve_traceback")
1197 or self.is_authorized(Permission.TRACEBACK)
1198 ),
1199 )
1200 return
1202 if self.content_type in dict_content_types:
1203 self.finish( # type: ignore[unused-awaitable]
1204 {
1205 "status": status_code,
1206 "reason": self.get_error_message(**kwargs),
1207 }
1208 )
1209 return
1211 self.finish( # type: ignore[unused-awaitable]
1212 f"{status_code} {self.get_error_message(**kwargs)}\n"
1213 )
1215 write_error.__doc__ = _RequestHandler.write_error.__doc__