Coverage for an_website / utils / base_request_handler.py: 79.032%
496 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-11 19:37 +0000
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-11 19:37 +0000
1# This program is free software: you can redistribute it and/or modify
2# it under the terms of the GNU Affero General Public License as
3# published by the Free Software Foundation, either version 3 of the
4# License, or (at your option) any later version.
5#
6# This program is distributed in the hope that it will be useful,
7# but WITHOUT ANY WARRANTY; without even the implied warranty of
8# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9# GNU Affero General Public License for more details.
10#
11# You should have received a copy of the GNU Affero General Public License
12# along with this program. If not, see <https://www.gnu.org/licenses/>.
13# pylint: disable=too-many-lines
15"""
16The base request handler used by other modules.
18This should only contain the BaseRequestHandler class.
19"""
21from __future__ import annotations
23import contextlib
24import inspect
25import logging
26import secrets
27import sys
28import traceback
29import uuid
30from asyncio import Future
31from base64 import b64decode
32from collections.abc import Awaitable, Callable, Coroutine, Mapping
33from contextvars import ContextVar
34from datetime import date, datetime, timedelta, timezone, tzinfo
35from functools import cached_property, partial, reduce
36from random import Random, choice as random_choice
37from types import TracebackType
38from typing import Any, ClassVar, Final, cast, override
39from urllib.parse import SplitResult, urlsplit, urlunsplit
40from zoneinfo import ZoneInfo
42import elasticapm
43import html2text
44import orjson as json
45import regex
46import tornado.web
47import yaml
48from accept_types import get_best_match # type: ignore[import-untyped]
49from ansi2html import Ansi2HTMLConverter
50from bs4 import BeautifulSoup
51from dateutil.easter import easter
52from elastic_transport import ApiError, TransportError
53from elasticsearch import AsyncElasticsearch
54from openmoji_dist import VERSION as OPENMOJI_VERSION
55from redis.asyncio import Redis
56from tornado.httputil import HTTPServerRequest
57from tornado.iostream import StreamClosedError
58from tornado.web import (
59 Finish,
60 GZipContentEncoding,
61 HTTPError,
62 MissingArgumentError,
63 OutputTransform,
64)
66from .. import (
67 EVENT_ELASTICSEARCH,
68 EVENT_REDIS,
69 GH_ORG_URL,
70 GH_PAGES_URL,
71 GH_REPO_URL,
72 NAME,
73 ORJSON_OPTIONS,
74 pytest_is_running,
75)
76from .decorators import is_authorized
77from .options import ColourScheme, Options
78from .static_file_handling import FILE_HASHES_DICT, fix_static_path
79from .themes import THEMES
80from .utils import (
81 ModuleInfo,
82 Permission,
83 add_args_to_url,
84 ansi_replace,
85 apply,
86 backspace_replace,
87 bool_to_str,
88 emoji2html,
89 geoip,
90 hash_bytes,
91 is_prime,
92 ratelimit,
93 str_to_bool,
94)
96LOGGER: Final = logging.getLogger(__name__)
98TEXT_CONTENT_TYPES: Final[set[str]] = {
99 "application/javascript",
100 "application/json",
101 "application/vnd.asozial.dynload+json",
102 "application/x-ndjson",
103 "application/xml",
104 "application/yaml",
105}
107CLACKS_OVERHEADS = (
108 "GNU Aaron Swartz",
109 "GNU Carol Angie Deborah Maltesi",
110 "GNU Charlotte Angie",
111 "GNU Terry Pratchett",
112)
114request_ctx_var: ContextVar[HTTPServerRequest] = ContextVar("current_request")
117class _RequestHandler(tornado.web.RequestHandler):
118 """Base for Tornado request handlers."""
120 crawler: bool = False
122 @override
123 async def _execute(
124 self, transforms: list[OutputTransform], *args: bytes, **kwargs: bytes
125 ) -> None:
126 request_ctx_var.set(self.request)
127 return await super()._execute(transforms, *args, **kwargs)
129 # pylint: disable-next=protected-access
130 _execute.__doc__ = tornado.web.RequestHandler._execute.__doc__
132 @property
133 def apm_client(self) -> None | elasticapm.Client:
134 """Get the APM client from the settings."""
135 return self.settings.get("ELASTIC_APM", {}).get("CLIENT") # type: ignore[no-any-return]
137 @property
138 def apm_enabled(self) -> bool:
139 """Return whether APM is enabled."""
140 return bool(self.settings.get("ELASTIC_APM", {}).get("ENABLED"))
142 @override
143 def data_received( # noqa: D102
144 self, chunk: bytes
145 ) -> None | Awaitable[None]:
146 pass
148 data_received.__doc__ = tornado.web.RequestHandler.data_received.__doc__
150 @property
151 def elasticsearch(self) -> AsyncElasticsearch:
152 """
153 Get the Elasticsearch client from the settings.
155 This is None if Elasticsearch is not enabled.
156 """
157 return cast(AsyncElasticsearch, self.settings.get("ELASTICSEARCH"))
159 @property
160 def elasticsearch_prefix(self) -> str:
161 """Get the Elasticsearch prefix from the settings."""
162 return self.settings.get( # type: ignore[no-any-return]
163 "ELASTICSEARCH_PREFIX", NAME
164 )
166 def geoip(
167 self,
168 ip: None | str = None,
169 database: str = geoip.__defaults__[0], # type: ignore[index]
170 *,
171 allow_fallback: bool = True,
172 ) -> Coroutine[None, None, None | dict[str, Any]]:
173 """Get GeoIP information."""
174 if not ip:
175 ip = self.request.remote_ip
176 if not EVENT_ELASTICSEARCH.is_set():
177 return geoip(ip, database)
178 return geoip(
179 ip, database, self.elasticsearch, allow_fallback=allow_fallback
180 )
182 async def get_time(self) -> datetime:
183 """Get the start time of the request in the users' timezone."""
184 tz: tzinfo = timezone.utc
185 try:
186 geoip = await self.geoip() # pylint: disable=redefined-outer-name
187 except (ApiError, TransportError):
188 LOGGER.exception("Elasticsearch request failed")
189 if self.apm_client:
190 self.apm_client.capture_exception() # type: ignore[no-untyped-call]
191 else:
192 if geoip and "timezone" in geoip:
193 tz = ZoneInfo(geoip["timezone"])
194 return datetime.fromtimestamp(
195 self.request._start_time, tz=tz # pylint: disable=protected-access
196 )
198 def is_authorized(
199 self, permission: Permission, allow_cookie_auth: bool = True
200 ) -> bool | None:
201 """Check whether the request is authorized."""
202 return is_authorized(self, permission, allow_cookie_auth)
204 @override
205 def log_exception(
206 self,
207 typ: None | type[BaseException],
208 value: None | BaseException,
209 tb: None | TracebackType,
210 ) -> None:
211 if isinstance(value, HTTPError):
212 super().log_exception(typ, value, tb)
213 elif typ is StreamClosedError:
214 LOGGER.debug(
215 "Stream closed %s",
216 self._request_summary(),
217 exc_info=(typ, value, tb), # type: ignore[arg-type]
218 )
219 else:
220 LOGGER.error(
221 "Uncaught exception %s",
222 self._request_summary(),
223 exc_info=(typ, value, tb), # type: ignore[arg-type]
224 )
226 log_exception.__doc__ = tornado.web.RequestHandler.log_exception.__doc__
228 @cached_property
229 def now(self) -> datetime:
230 """Get the current time."""
231 # pylint: disable=method-hidden
232 if pytest_is_running():
233 raise AssertionError("Now accessed before it was set")
234 # if self.request.method in self.SUPPORTED_METHODS: # Why?
235 LOGGER.error("Now accessed before it was set", stacklevel=3)
236 return self.now_utc
238 @cached_property
239 def now_utc(self) -> datetime:
240 """Get the current time in the correct timezone."""
241 return datetime.fromtimestamp(
242 self.request._start_time, # pylint: disable=protected-access
243 tz=timezone.utc,
244 )
246 @override
247 async def prepare(self) -> None:
248 """Check authorization and call self.ratelimit()."""
249 # pylint: disable=invalid-overridden-method
250 self.now = await self.get_time()
252 if crawler_secret := self.settings.get("CRAWLER_SECRET"):
253 self.crawler = crawler_secret in self.request.headers.get(
254 "User-Agent", ""
255 )
257 if (
258 self.request.method in {"GET", "HEAD"}
259 and self.redirect_to_canonical_domain()
260 ):
261 return
263 if self.request.method != "OPTIONS" and not await self.ratelimit(True):
264 await self.ratelimit()
266 async def ratelimit(self, global_ratelimit: bool = False) -> bool:
267 """Take b1nzy to space using Redis."""
268 if (
269 not self.settings.get("RATELIMITS")
270 or self.request.method == "OPTIONS"
271 or self.is_authorized(Permission.RATELIMITS)
272 or self.crawler
273 ):
274 return False
276 if not EVENT_REDIS.is_set():
277 LOGGER.warning(
278 (
279 "Ratelimits are enabled, but Redis is not available. "
280 "This can happen shortly after starting the website."
281 ),
282 )
283 raise HTTPError(503)
285 if global_ratelimit: # TODO: add to _RequestHandler
286 ratelimited, headers = await ratelimit(
287 self.redis,
288 self.redis_prefix,
289 str(self.request.remote_ip),
290 bucket=None,
291 max_burst=99, # limit = 100
292 count_per_period=20, # 20 requests per second
293 period=1,
294 tokens=10 if self.settings.get("UNDER_ATTACK") else 1,
295 )
296 else:
297 method = (
298 "GET" if self.request.method == "HEAD" else self.request.method
299 )
300 if not (limit := getattr(self, f"RATELIMIT_{method}_LIMIT", 0)):
301 return False
302 ratelimited, headers = await ratelimit(
303 self.redis,
304 self.redis_prefix,
305 str(self.request.remote_ip),
306 bucket=getattr(
307 self,
308 f"RATELIMIT_{method}_BUCKET",
309 self.__class__.__name__.lower(),
310 ),
311 max_burst=limit - 1,
312 count_per_period=getattr( # request count per period
313 self,
314 f"RATELIMIT_{method}_COUNT_PER_PERIOD",
315 30,
316 ),
317 period=getattr(
318 self, f"RATELIMIT_{method}_PERIOD", 60 # period in seconds
319 ),
320 tokens=1 if self.request.method != "HEAD" else 0,
321 )
323 for header, value in headers.items():
324 self.set_header(header, value)
326 if ratelimited:
327 if self.now.date() == date(self.now.year, 4, 20):
328 self.set_status(420)
329 self.write_error(420)
330 else:
331 self.set_status(429)
332 self.write_error(429)
334 return ratelimited
336 def redirect_to_canonical_domain(self) -> bool:
337 """Redirect to the canonical domain."""
338 if (
339 not (domain := self.settings.get("DOMAIN"))
340 or not self.request.headers.get("Host")
341 or self.request.host_name == domain
342 or self.request.host_name.endswith((".onion", ".i2p"))
343 or regex.fullmatch(r"/[\u2800-\u28FF]+/?", self.request.path)
344 ):
345 return False
346 port = urlsplit(f"//{self.request.headers['Host']}").port
347 self.redirect(
348 urlsplit(self.request.full_url())
349 ._replace(netloc=f"{domain}:{port}" if port else domain)
350 .geturl(),
351 permanent=True,
352 )
353 return True
355 @property
356 def redis(self) -> Redis[str]:
357 """
358 Get the Redis client from the settings.
360 This is None if Redis is not enabled.
361 """
362 return cast("Redis[str]", self.settings.get("REDIS"))
364 @property
365 def redis_prefix(self) -> str:
366 """Get the Redis prefix from the settings."""
367 return self.settings.get( # type: ignore[no-any-return]
368 "REDIS_PREFIX", NAME
369 )
372class BaseRequestHandler(_RequestHandler):
373 """The base request handler used by every page and API."""
375 # pylint: disable=too-many-instance-attributes, too-many-public-methods
377 ELASTIC_RUM_URL: ClassVar[str] = (
378 f"/@apm-rum/elastic-apm-rum.umd{'' if sys.flags.dev_mode else '.min'}.js"
379 "?v=5.12.0"
380 )
382 COMPUTE_ETAG: ClassVar[bool] = True
383 ALLOW_COMPRESSION: ClassVar[bool] = True
384 MAX_BODY_SIZE: ClassVar[None | int] = None
385 ALLOWED_METHODS: ClassVar[tuple[str, ...]] = ("GET",)
386 POSSIBLE_CONTENT_TYPES: ClassVar[tuple[str, ...]] = ()
388 module_info: ModuleInfo
389 # info about page, can be overridden in module_info
390 title: str = "Das Asoziale Netzwerk"
391 short_title: str = "Asoziales Netzwerk"
392 description: str = "Die tolle Webseite des Asozialen Netzwerks"
394 used_render: bool = False
396 active_origin_trials: set[str]
397 content_type: None | str = None
398 apm_script: None | str
399 nonce: str
401 def _finish(
402 self, chunk: None | str | bytes | dict[str, Any] = None
403 ) -> Future[None]:
404 if self._finished:
405 raise RuntimeError("finish() called twice")
407 if chunk is not None:
408 self.write(chunk)
410 if ( # pylint: disable=too-many-boolean-expressions
411 (content_type := self.content_type)
412 and (
413 content_type in TEXT_CONTENT_TYPES
414 or content_type.startswith("text/")
415 or content_type.endswith(("+xml", "+json"))
416 )
417 and self._write_buffer
418 and not self._write_buffer[-1].endswith(b"\n")
419 ):
420 self.write(b"\n")
422 return super().finish()
424 @override
425 def compute_etag(self) -> None | str:
426 """Compute ETag with Base85 encoding."""
427 if not self.COMPUTE_ETAG:
428 return None
429 return f'"{hash_bytes(*self._write_buffer)}"' # noqa: B907
431 @override
432 def decode_argument( # noqa: D102
433 self, value: bytes, name: str | None = None
434 ) -> str:
435 try:
436 return value.decode("UTF-8", "replace")
437 except UnicodeDecodeError as exc:
438 err_msg = f"Invalid unicode in {name or 'url'}: {value[:40]!r}"
439 LOGGER.exception(err_msg, exc_info=exc)
440 raise HTTPError(400, err_msg) from exc
442 @property
443 def dump(self) -> Callable[[Any], str | bytes]:
444 """Get the function for dumping the output."""
445 yaml_subset = self.content_type in {
446 "application/json",
447 "application/vnd.asozial.dynload+json",
448 }
450 if self.content_type == "application/yaml":
451 if self.now.timetuple()[2:0:-1] == (1, 4):
452 yaml_subset = True
453 else:
454 return lambda spam: yaml.dump(
455 spam,
456 width=self.get_int_argument("yaml_width", 80, min_=80),
457 )
459 if yaml_subset:
460 option = ORJSON_OPTIONS
461 if self.get_bool_argument("pretty", False):
462 option |= json.OPT_INDENT_2
463 return lambda spam: json.dumps(spam, option=option)
465 return lambda spam: spam
467 @override
468 def finish( # noqa: D102
469 self, chunk: None | str | bytes | dict[Any, Any] = None
470 ) -> Future[None]:
471 as_json = self.content_type == "application/vnd.asozial.dynload+json"
472 as_plain_text = self.content_type == "text/plain"
473 as_markdown = self.content_type == "text/markdown"
475 if (
476 not isinstance(chunk, bytes | str)
477 or self.content_type == "text/html"
478 or not self.used_render
479 or not (as_json or as_plain_text or as_markdown)
480 ):
481 return self._finish(chunk)
483 chunk = chunk.decode("UTF-8") if isinstance(chunk, bytes) else chunk
485 if as_markdown:
486 return self._finish(
487 f"# {self.title}\n\n"
488 + html2text.html2text(chunk, self.request.full_url()).strip()
489 )
491 soup = BeautifulSoup(chunk, features="lxml")
493 if as_plain_text:
494 return self._finish(soup.get_text("\n", True))
496 dictionary: dict[str, object] = {
497 "url": self.fix_url(include_protocol_and_host=True),
498 "title": self.title,
499 "short_title": (
500 self.short_title if self.title != self.short_title else None
501 ),
502 "body": "".join(
503 str(element)
504 for element in soup.find_all(name="main")[0].contents
505 ).strip(),
506 "scripts": [
507 {"script": script.string} | script.attrs
508 for script in soup.find_all("script")
509 ],
510 "stylesheets": [
511 stylesheet.get("href").strip()
512 for stylesheet in soup.find_all("link", rel="stylesheet")
513 ],
514 "css": "\n".join(style.string for style in soup.find_all("style")),
515 }
517 return self._finish(dictionary)
519 finish.__doc__ = _RequestHandler.finish.__doc__
521 def finish_dict(self, **kwargs: Any) -> Future[None]:
522 """Finish the request with a dictionary."""
523 return self.finish(kwargs)
525 def fix_url(
526 self,
527 url: None | str | SplitResult = None,
528 new_path: None | str = None,
529 include_protocol_and_host: bool | str = False,
530 query_args: Mapping[str, None | str | bool | float] | None = None,
531 ) -> str:
532 """
533 Fix a URL and return it.
535 If the URL is from another website, link to it with the redirect page,
536 otherwise just return the URL with no_3rd_party appended.
537 """
538 query_args_d = dict(query_args or {})
539 del query_args
540 if url is None:
541 url = self.request.full_url()
542 if isinstance(url, str):
543 url = urlsplit(url)
544 if url.netloc and url.netloc.lower() != self.request.host.lower():
545 if (
546 not self.user_settings.ask_before_leaving
547 or not self.settings.get("REDIRECT_MODULE_LOADED")
548 ):
549 return url.geturl()
550 path = "/redirect"
551 query_args_d["to"] = url.geturl()
552 url = urlsplit(self.request.full_url())
553 else:
554 path = url.path if new_path is None else new_path
555 path = f"/{path.strip('/')}".lower()
556 if path == "/lolwut":
557 path = path.upper()
558 if path.startswith("/soundboard/files/") or path in FILE_HASHES_DICT:
559 query_args_d.update(
560 dict.fromkeys(self.user_settings.iter_option_names())
561 )
562 else:
563 for (
564 key,
565 value,
566 ) in self.user_settings.as_dict_with_str_values().items():
567 query_args_d.setdefault(key, value)
568 for key, value in self.user_settings.as_dict_with_str_values(
569 include_query_argument=False,
570 include_body_argument=self.request.path == "/einstellungen"
571 and self.get_bool_argument("save_in_cookie", False),
572 ).items():
573 if value == query_args_d[key]:
574 query_args_d[key] = None
576 result = add_args_to_url(
577 urlunsplit(
578 (
579 self.request.protocol,
580 self.request.host,
581 path,
582 url.query,
583 url.fragment,
584 )
585 ),
586 **query_args_d,
587 )
589 return (
590 result
591 if include_protocol_and_host
592 else result.removeprefix(
593 f"{self.request.protocol}://{self.request.host}"
594 )
595 )
597 @classmethod
598 def get_allowed_methods(cls) -> list[str]:
599 """Get allowed methods."""
600 methods = {"OPTIONS", *cls.ALLOWED_METHODS}
601 if "GET" in cls.ALLOWED_METHODS and cls.supports_head():
602 methods.add("HEAD")
603 return sorted(methods)
605 def get_bool_argument(
606 self,
607 name: str,
608 default: None | bool = None,
609 ) -> bool:
610 """Get an argument parsed as boolean."""
611 if default is not None:
612 return str_to_bool(self.get_argument(name, ""), default)
613 value = str(self.get_argument(name))
614 try:
615 return str_to_bool(value)
616 except ValueError as err:
617 raise HTTPError(400, f"{value} is not a boolean") from err
619 def get_display_scheme(self) -> ColourScheme:
620 """Get the scheme currently displayed."""
621 scheme = self.user_settings.scheme
622 if scheme == "random":
623 return ("light", "dark")[self.now.microsecond & 1]
624 return scheme
626 def get_display_theme(self) -> str:
627 """Get the theme currently displayed."""
628 theme = self.user_settings.theme
630 if theme == "default" and self.now.month == 12:
631 return "christmas"
633 if theme != "random":
634 return theme
636 ignore_themes = ("random", "christmas")
638 return random_choice( # nosec: B311
639 tuple(theme for theme in THEMES if theme not in ignore_themes)
640 )
642 def get_error_message(self, **kwargs: Any) -> str:
643 """
644 Get the error message and return it.
646 If the serve_traceback setting is true (debug mode is activated),
647 the traceback gets returned.
648 """
649 if "exc_info" in kwargs and not issubclass(
650 kwargs["exc_info"][0], HTTPError
651 ):
652 if self.settings.get("serve_traceback") or self.is_authorized(
653 Permission.TRACEBACK
654 ):
655 return "".join(
656 traceback.format_exception(*kwargs["exc_info"])
657 ).strip()
658 return "".join(
659 traceback.format_exception_only(*kwargs["exc_info"][:2])
660 ).strip()
661 if "exc_info" in kwargs and issubclass(
662 kwargs["exc_info"][0], MissingArgumentError
663 ):
664 return cast(str, kwargs["exc_info"][1].log_message)
665 return str(self._reason)
667 def get_error_page_description(self, status_code: int) -> str:
668 """Get the description for the error page."""
669 # pylint: disable=too-many-return-statements
670 # https://developer.mozilla.org/docs/Web/HTTP/Status
671 if 100 <= status_code <= 199:
672 return "Hier gibt es eine total wichtige Information."
673 if 200 <= status_code <= 299:
674 return "Hier ist alles super! 🎶🎶"
675 if 300 <= status_code <= 399:
676 return "Eine Umleitung ist eingerichtet."
677 if 400 <= status_code <= 499:
678 if status_code == 404:
679 return f"{self.request.path} wurde nicht gefunden."
680 if status_code == 451:
681 return "Hier wäre bestimmt geiler Scheiß."
682 return "Ein Client-Fehler ist aufgetreten."
683 if 500 <= status_code <= 599:
684 return "Ein Server-Fehler ist aufgetreten."
685 raise ValueError(
686 f"{status_code} is not a valid HTTP response status code."
687 )
689 def get_int_argument(
690 self,
691 name: str,
692 default: None | int = None,
693 *,
694 max_: None | int = None,
695 min_: None | int = None,
696 ) -> int:
697 """Get an argument parsed as integer."""
698 if default is None:
699 str_value = self.get_argument(name)
700 try:
701 value = int(str_value, base=0)
702 except ValueError as err:
703 raise HTTPError(400, f"{str_value} is not an integer") from err
704 elif self.get_argument(name, ""):
705 try:
706 value = int(self.get_argument(name), base=0)
707 except ValueError:
708 value = default
709 else:
710 value = default
712 if max_ is not None:
713 value = min(max_, value)
714 if min_ is not None:
715 value = max(min_, value)
717 return value
719 def get_module_infos(self) -> tuple[ModuleInfo, ...]:
720 """Get the module infos."""
721 return self.settings.get("MODULE_INFOS") or ()
723 def get_reporting_api_endpoint(self) -> None | str:
724 """Get the endpoint for the Reporting API™️."""
725 if not self.settings.get("REPORTING"):
726 return None
727 endpoint = self.settings.get("REPORTING_ENDPOINT")
729 if not endpoint or not endpoint.startswith("/"):
730 return endpoint
732 return f"{self.request.protocol}://{self.request.host}{endpoint}"
734 @override
735 def get_template_namespace(self) -> dict[str, Any]:
736 """
737 Add useful things to the template namespace and return it.
739 They are mostly needed by most of the pages (like title,
740 description and no_3rd_party).
741 """
742 namespace = super().get_template_namespace()
743 ansi2html = partial(
744 Ansi2HTMLConverter(inline=True, scheme="xterm").convert, full=False
745 )
746 namespace.update(self.user_settings.as_dict())
747 namespace.update(
748 ansi2html=partial(
749 reduce, apply, (ansi2html, ansi_replace, backspace_replace)
750 ),
751 apm_script=(
752 self.settings["ELASTIC_APM"].get("INLINE_SCRIPT")
753 if self.apm_enabled
754 else None
755 ),
756 as_html=self.content_type == "text/html",
757 c=self.now.date() == date(self.now.year, 4, 1)
758 or str_to_bool(self.get_cookie("c", "f") or "f", False),
759 canonical_url=self.request.protocol
760 + "://"
761 + (self.settings["DOMAIN"] or self.request.host)
762 + self.fix_url(
763 self.request.full_url().upper()
764 if self.request.path.upper().startswith("/LOLWUT")
765 else self.request.full_url().lower()
766 )
767 .split("?")[0]
768 .removesuffix("/"),
769 description=self.description,
770 display_theme=self.get_display_theme(),
771 display_scheme=self.get_display_scheme(),
772 elastic_rum_url=self.ELASTIC_RUM_URL,
773 fix_static=lambda path: self.fix_url(fix_static_path(path)),
774 fix_url=self.fix_url,
775 emoji2html=(
776 emoji2html
777 if self.user_settings.openmoji == "img"
778 else (
779 (lambda emoji: f'<span class="openmoji">{emoji}</span>')
780 if self.user_settings.openmoji
781 else (lambda emoji: f"<span>{emoji}</span>")
782 )
783 ),
784 form_appendix=self.user_settings.get_form_appendix(),
785 GH_ORG_URL=GH_ORG_URL,
786 GH_PAGES_URL=GH_PAGES_URL,
787 GH_REPO_URL=GH_REPO_URL,
788 keywords="Asoziales Netzwerk, Känguru-Chroniken"
789 + (
790 f", {self.module_info.get_keywords_as_str(self.request.path)}"
791 if self.module_info # type: ignore[truthy-bool]
792 else ""
793 ),
794 lang="de", # TODO: add language support
795 nonce=self.nonce,
796 now=self.now,
797 openmoji_version=OPENMOJI_VERSION,
798 settings=self.settings,
799 short_title=self.short_title,
800 testing=pytest_is_running(),
801 title=self.title,
802 )
803 namespace.update(
804 {
805 "🥚": timedelta()
806 <= self.now.date() - easter(self.now.year)
807 < timedelta(days=2),
808 "🦘": is_prime(self.now.microsecond),
809 }
810 )
811 return namespace
813 def get_user_id(self) -> str:
814 """Get the user id saved in the cookie or create one."""
815 cookie = self.get_secure_cookie(
816 "user_id",
817 max_age_days=90,
818 min_version=2,
819 )
821 user_id = cookie.decode("UTF-8") if cookie else str(uuid.uuid4())
823 if not self.get_secure_cookie( # save it in cookie or reset expiry date
824 "user_id", max_age_days=30, min_version=2
825 ):
826 self.set_secure_cookie(
827 "user_id",
828 user_id,
829 expires_days=90,
830 path="/",
831 samesite="Strict",
832 )
834 return user_id
836 def handle_accept_header( # pylint: disable=inconsistent-return-statements
837 self, possible_content_types: tuple[str, ...], strict: bool = True
838 ) -> None:
839 """Handle the Accept header and set `self.content_type`."""
840 if not possible_content_types:
841 return
842 content_type = get_best_match(
843 self.request.headers.get("Accept") or "*/*",
844 possible_content_types,
845 )
846 if content_type is None:
847 if strict:
848 return self.handle_not_acceptable(possible_content_types)
849 content_type = possible_content_types[0]
850 self.content_type = content_type
851 self.set_content_type_header()
853 def handle_not_acceptable(
854 self, possible_content_types: tuple[str, ...]
855 ) -> None:
856 """Only call this if we cannot respect the Accept header."""
857 self.clear_header("Content-Type")
858 self.set_status(406)
859 raise Finish("\n".join(possible_content_types) + "\n")
861 def head(self, *args: Any, **kwargs: Any) -> None | Awaitable[None]:
862 """Handle HEAD requests."""
863 if self.get.__module__ == "tornado.web":
864 raise HTTPError(405)
865 if not self.supports_head():
866 raise HTTPError(501)
868 kwargs["head"] = True
869 return self.get(*args, **kwargs)
871 @override
872 def initialize(
873 self,
874 *,
875 module_info: ModuleInfo,
876 # default is true, because then empty args dicts are
877 # enough to specify that the defaults should be used
878 default_title: bool = True,
879 default_description: bool = True,
880 ) -> None:
881 """
882 Get title and description from the kwargs.
884 If title and description are present in the kwargs,
885 then they override self.title and self.description.
886 """
887 self.module_info = module_info
888 if not default_title:
889 page_info = self.module_info.get_page_info(self.request.path)
890 self.title = page_info.name
891 self.short_title = page_info.short_name or self.title
892 if not default_description:
893 self.description = self.module_info.get_page_info(
894 self.request.path
895 ).description
897 @override
898 async def options(self, *args: Any, **kwargs: Any) -> None:
899 """Handle OPTIONS requests."""
900 # pylint: disable=unused-argument
901 self.set_header("Allow", ", ".join(self.get_allowed_methods()))
902 self.set_status(204)
903 await self.finish()
905 def origin_trial(self, token: bytes | str) -> bool:
906 """Enable an experimental feature."""
907 # pylint: disable=protected-access
908 payload = json.loads(b64decode(token)[69:])
909 if payload["feature"] in self.active_origin_trials:
910 return True
911 origin = urlsplit(payload["origin"])
912 url = urlsplit(self.request.full_url())
913 if url.port is None and url.scheme in {"http", "https"}:
914 url = url._replace(
915 netloc=f"{url.hostname}:{443 if url.scheme == 'https' else 80}"
916 )
917 if self.request._start_time > payload["expiry"]:
918 return False
919 if url.scheme != origin.scheme:
920 return False
921 if url.netloc != origin.netloc and not (
922 payload.get("isSubdomain")
923 and url.netloc.endswith(f".{origin.netloc}")
924 ):
925 return False
926 self.add_header("Origin-Trial", token)
927 self.active_origin_trials.add(payload["feature"])
928 return True
930 @override
931 async def prepare(self) -> None:
932 """Check authorization and call self.ratelimit()."""
933 await super().prepare()
935 if self._finished:
936 return
938 if not self.ALLOW_COMPRESSION:
939 for transform in self._transforms:
940 if isinstance(transform, GZipContentEncoding):
941 # pylint: disable=protected-access
942 transform._gzipping = False
944 self.handle_accept_header(self.POSSIBLE_CONTENT_TYPES)
946 if self.request.method == "GET" and (
947 days := Random(self.now.timestamp()).randint(0, 31337)
948 ) in {
949 69,
950 420,
951 1337,
952 31337,
953 }:
954 self.set_cookie("c", "s", expires_days=days / 24, path="/")
956 if (
957 self.request.method != "OPTIONS"
958 and self.MAX_BODY_SIZE is not None
959 and len(self.request.body) > self.MAX_BODY_SIZE
960 ):
961 LOGGER.warning(
962 "%s > MAX_BODY_SIZE (%s)",
963 len(self.request.body),
964 self.MAX_BODY_SIZE,
965 )
966 raise HTTPError(413)
968 @override
969 def render( # noqa: D102
970 self, template_name: str, **kwargs: Any
971 ) -> Future[None]:
972 self.used_render = True
973 return super().render(template_name, **kwargs)
975 render.__doc__ = _RequestHandler.render.__doc__
977 def set_content_type_header(self) -> None:
978 """Set the Content-Type header based on `self.content_type`."""
979 if str(self.content_type).startswith("text/"): # RFC 2616 (3.7.1)
980 self.set_header(
981 "Content-Type", f"{self.content_type};charset=utf-8"
982 )
983 elif self.content_type is not None:
984 self.set_header("Content-Type", self.content_type)
986 @override
987 def set_cookie( # noqa: D102 # pylint: disable=too-many-arguments
988 self,
989 name: str,
990 value: str | bytes,
991 domain: None | str = None,
992 expires: None | float | tuple[int, ...] | datetime = None,
993 path: str = "/",
994 expires_days: None | float = 400, # changed
995 *,
996 secure: bool | None = None,
997 httponly: bool = True,
998 **kwargs: Any,
999 ) -> None:
1000 if "samesite" not in kwargs:
1001 # default for same site should be strict
1002 kwargs["samesite"] = "Strict"
1004 super().set_cookie(
1005 name,
1006 value,
1007 domain,
1008 expires,
1009 path,
1010 expires_days,
1011 secure=(
1012 self.request.protocol == "https" if secure is None else secure
1013 ),
1014 httponly=httponly,
1015 **kwargs,
1016 )
1018 set_cookie.__doc__ = _RequestHandler.set_cookie.__doc__
1020 def set_csp_header(self) -> None:
1021 """Set the Content-Security-Policy header."""
1022 self.nonce = secrets.token_urlsafe(16)
1024 script_src = ["'self'", f"'nonce-{self.nonce}'"]
1026 if (
1027 self.apm_enabled
1028 and "INLINE_SCRIPT_HASH" in self.settings["ELASTIC_APM"]
1029 ):
1030 script_src.extend(
1031 (
1032 f"'sha256-{self.settings['ELASTIC_APM']['INLINE_SCRIPT_HASH']}'",
1033 "'unsafe-inline'", # for browsers that don't support hash
1034 )
1035 )
1037 connect_src = ["'self'"]
1039 if self.apm_enabled and "SERVER_URL" in self.settings["ELASTIC_APM"]:
1040 rum_server_url = self.settings["ELASTIC_APM"].get("RUM_SERVER_URL")
1041 if rum_server_url:
1042 # the RUM agent needs to connect to rum_server_url
1043 connect_src.append(rum_server_url)
1044 elif rum_server_url is None:
1045 # the RUM agent needs to connect to ["ELASTIC_APM"]["SERVER_URL"]
1046 connect_src.append(self.settings["ELASTIC_APM"]["SERVER_URL"])
1048 connect_src.append( # fix for older browsers
1049 ("wss" if self.request.protocol == "https" else "ws")
1050 + f"://{self.request.host}"
1051 )
1053 self.set_header(
1054 "Content-Security-Policy",
1055 "default-src 'self';"
1056 f"script-src {' '.join(script_src)};"
1057 f"connect-src {' '.join(connect_src)};"
1058 "style-src 'self' 'unsafe-inline';"
1059 "img-src 'self' https://img.zeit.de https://github.asozial.org;"
1060 "frame-ancestors 'self';"
1061 "sandbox allow-downloads allow-same-origin allow-modals"
1062 " allow-popups-to-escape-sandbox allow-scripts allow-popups"
1063 " allow-top-navigation-by-user-activation allow-forms;"
1064 "report-to default;"
1065 "base-uri 'none';"
1066 + (
1067 f"report-uri {self.get_reporting_api_endpoint()};"
1068 if self.settings.get("REPORTING")
1069 else ""
1070 ),
1071 )
1073 @override
1074 def set_default_headers(self) -> None:
1075 """Set default headers."""
1076 self.set_csp_header()
1077 self.active_origin_trials = set()
1078 if self.settings.get("REPORTING"):
1079 endpoint = self.get_reporting_api_endpoint()
1080 self.set_header(
1081 "Reporting-Endpoints",
1082 f'default="{endpoint}"', # noqa: B907
1083 )
1084 self.set_header(
1085 "Report-To",
1086 json.dumps(
1087 {
1088 "group": "default",
1089 "max_age": 2592000,
1090 "endpoints": [{"url": endpoint}],
1091 },
1092 option=ORJSON_OPTIONS,
1093 ),
1094 )
1095 self.set_header("NEL", '{"report_to":"default","max_age":2592000}')
1096 self.set_header("X-Content-Type-Options", "nosniff")
1097 self.set_header("Access-Control-Max-Age", "7200")
1098 self.set_header("Access-Control-Allow-Origin", "*")
1099 self.set_header("Access-Control-Allow-Headers", "*")
1100 self.set_header(
1101 "Access-Control-Allow-Methods",
1102 ", ".join(self.get_allowed_methods()),
1103 )
1104 self.set_header("Cross-Origin-Resource-Policy", "cross-origin")
1105 self.set_header(
1106 "Permissions-Policy",
1107 "browsing-topics=(),"
1108 "identity-credentials-get=(),"
1109 "join-ad-interest-group=(),"
1110 "private-state-token-issuance=(),"
1111 "private-state-token-redemption=(),"
1112 "run-ad-auction=()",
1113 )
1114 self.set_header("Referrer-Policy", "same-origin")
1115 self.set_header(
1116 "Cross-Origin-Opener-Policy", "same-origin;report-to=default"
1117 )
1118 if self.request.path == "/kaenguru-comics-alt": # TODO: improve this
1119 self.set_header(
1120 "Cross-Origin-Embedder-Policy",
1121 "credentialless;report-to=default",
1122 )
1123 else:
1124 self.set_header(
1125 "Cross-Origin-Embedder-Policy",
1126 "require-corp;report-to=default",
1127 )
1128 if self.settings.get("HSTS"):
1129 self.set_header("Strict-Transport-Security", "max-age=63072000")
1130 if (
1131 onion_address := self.settings.get("ONION_ADDRESS")
1132 ) and not self.request.host_name.endswith(".onion"):
1133 self.set_header(
1134 "Onion-Location",
1135 onion_address
1136 + self.request.path
1137 + (f"?{self.request.query}" if self.request.query else ""),
1138 )
1139 if self.settings.get("debug"):
1140 self.set_header("X-Debug", bool_to_str(True))
1141 for permission in Permission:
1142 if permission.name:
1143 self.set_header(
1144 f"X-Permission-{permission.name}",
1145 bool_to_str(bool(self.is_authorized(permission))),
1146 )
1147 self.set_header(
1148 "X-Clacks-Overhead",
1149 CLACKS_OVERHEADS[
1150 int(self.now_utc.microsecond) % len(CLACKS_OVERHEADS)
1151 ],
1152 )
1153 self.set_header("Vary", "Accept,Authorization,Cookie")
1155 set_default_headers.__doc__ = _RequestHandler.set_default_headers.__doc__
1157 @classmethod
1158 def supports_head(cls) -> bool:
1159 """Check whether this request handler supports HEAD requests."""
1160 signature = inspect.signature(cls.get)
1161 return (
1162 "head" in signature.parameters
1163 and signature.parameters["head"].kind
1164 == inspect.Parameter.KEYWORD_ONLY
1165 )
1167 @cached_property
1168 def user_settings(self) -> Options:
1169 """Get the user settings."""
1170 return Options(self)
1172 @override
1173 def write(self, chunk: str | bytes | dict[str, Any]) -> None: # noqa: D102
1174 if self._finished:
1175 raise RuntimeError("Cannot write() after finish()")
1177 self.set_content_type_header()
1179 if isinstance(chunk, dict):
1180 chunk = self.dump(chunk)
1182 if self.now.date() == date(self.now.year, 4, 27):
1183 if isinstance(chunk, bytes):
1184 with contextlib.suppress(UnicodeDecodeError):
1185 chunk = chunk.decode("UTF-8")
1186 if isinstance(chunk, str):
1187 chunk = regex.sub(
1188 r"\b\p{Lu}\p{Ll}{4}\p{Ll}*\b",
1189 lambda match: (
1190 "Stanley"
1191 if Random(match[0]).randrange(5) == self.now.year % 5
1192 else match[0]
1193 ),
1194 chunk,
1195 )
1197 super().write(chunk)
1199 write.__doc__ = _RequestHandler.write.__doc__
1201 @override
1202 def write_error(self, status_code: int, **kwargs: Any) -> None:
1203 """Render the error page."""
1204 dict_content_types: tuple[str, str] = (
1205 "application/json",
1206 "application/yaml",
1207 )
1208 all_error_content_types: tuple[str, ...] = (
1209 # text/plain as first (default), to not screw up output in terminals
1210 "text/plain",
1211 "text/html",
1212 "text/markdown",
1213 *dict_content_types,
1214 "application/vnd.asozial.dynload+json",
1215 )
1217 if self.content_type not in all_error_content_types:
1218 # don't send 406, instead default with text/plain
1219 self.handle_accept_header(all_error_content_types, strict=False)
1221 if self.content_type == "text/html":
1222 self.render( # type: ignore[unused-awaitable]
1223 "error.html",
1224 status=status_code,
1225 reason=self.get_error_message(**kwargs),
1226 description=self.get_error_page_description(status_code),
1227 is_traceback="exc_info" in kwargs
1228 and not issubclass(kwargs["exc_info"][0], HTTPError)
1229 and (
1230 self.settings.get("serve_traceback")
1231 or self.is_authorized(Permission.TRACEBACK)
1232 ),
1233 )
1234 return
1236 if self.content_type in dict_content_types:
1237 self.finish( # type: ignore[unused-awaitable]
1238 {
1239 "status": status_code,
1240 "reason": self.get_error_message(**kwargs),
1241 }
1242 )
1243 return
1245 self.finish( # type: ignore[unused-awaitable]
1246 f"{status_code} {self.get_error_message(**kwargs)}\n"
1247 )
1249 write_error.__doc__ = _RequestHandler.write_error.__doc__