Coverage for an_website/utils/base_request_handler.py: 78.659%
492 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-10-04 17:54 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-10-04 17:54 +0000
1# This program is free software: you can redistribute it and/or modify
2# it under the terms of the GNU Affero General Public License as
3# published by the Free Software Foundation, either version 3 of the
4# License, or (at your option) any later version.
5#
6# This program is distributed in the hope that it will be useful,
7# but WITHOUT ANY WARRANTY; without even the implied warranty of
8# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9# GNU Affero General Public License for more details.
10#
11# You should have received a copy of the GNU Affero General Public License
12# along with this program. If not, see <https://www.gnu.org/licenses/>.
13# pylint: disable=too-many-lines
15"""
16The base request handler used by other modules.
18This should only contain the BaseRequestHandler class.
19"""
21from __future__ import annotations
23import contextlib
24import inspect
25import logging
26import secrets
27import sys
28import traceback
29import uuid
30from asyncio import Future
31from base64 import b64decode
32from collections.abc import Awaitable, Callable, Coroutine, Mapping
33from contextvars import ContextVar
34from datetime import date, datetime, timedelta, timezone, tzinfo
35from functools import cached_property, partial, reduce
36from random import Random, choice as random_choice
37from types import TracebackType
38from typing import Any, ClassVar, Final, cast, override
39from urllib.parse import SplitResult, urlsplit, urlunsplit
40from zoneinfo import ZoneInfo
42import elasticapm
43import html2text
44import orjson as json
45import regex
46import tornado.web
47import yaml
48from accept_types import get_best_match # type: ignore[import-untyped]
49from ansi2html import Ansi2HTMLConverter
50from bs4 import BeautifulSoup
51from dateutil.easter import easter
52from elastic_transport import ApiError, TransportError
53from elasticsearch import AsyncElasticsearch
54from openmoji_dist import VERSION as OPENMOJI_VERSION
55from redis.asyncio import Redis
56from tornado.httputil import HTTPServerRequest
57from tornado.iostream import StreamClosedError
58from tornado.web import (
59 Finish,
60 GZipContentEncoding,
61 HTTPError,
62 MissingArgumentError,
63 OutputTransform,
64)
66from .. import (
67 EVENT_ELASTICSEARCH,
68 EVENT_REDIS,
69 GH_ORG_URL,
70 GH_PAGES_URL,
71 GH_REPO_URL,
72 NAME,
73 ORJSON_OPTIONS,
74 pytest_is_running,
75)
76from .decorators import is_authorized
77from .options import ColourScheme, Options
78from .static_file_handling import FILE_HASHES_DICT, fix_static_path
79from .themes import THEMES
80from .utils import (
81 ModuleInfo,
82 Permission,
83 add_args_to_url,
84 ansi_replace,
85 apply,
86 backspace_replace,
87 bool_to_str,
88 emoji2html,
89 geoip,
90 hash_bytes,
91 is_prime,
92 ratelimit,
93 str_to_bool,
94)
96LOGGER: Final = logging.getLogger(__name__)
98TEXT_CONTENT_TYPES: Final[set[str]] = {
99 "application/javascript",
100 "application/json",
101 "application/vnd.asozial.dynload+json",
102 "application/x-ndjson",
103 "application/xml",
104 "application/yaml",
105}
107request_ctx_var: ContextVar[HTTPServerRequest] = ContextVar("current_request")
110class _RequestHandler(tornado.web.RequestHandler):
111 """Base for Tornado request handlers."""
113 crawler: bool = False
115 @override
116 async def _execute(
117 self, transforms: list[OutputTransform], *args: bytes, **kwargs: bytes
118 ) -> None:
119 request_ctx_var.set(self.request)
120 return await super()._execute(transforms, *args, **kwargs)
122 # pylint: disable-next=protected-access
123 _execute.__doc__ = tornado.web.RequestHandler._execute.__doc__
125 @property
126 def apm_client(self) -> None | elasticapm.Client:
127 """Get the APM client from the settings."""
128 return self.settings.get("ELASTIC_APM", {}).get("CLIENT") # type: ignore[no-any-return]
130 @property
131 def apm_enabled(self) -> bool:
132 """Return whether APM is enabled."""
133 return bool(self.settings.get("ELASTIC_APM", {}).get("ENABLED"))
135 @override
136 def data_received( # noqa: D102
137 self, chunk: bytes
138 ) -> None | Awaitable[None]:
139 pass
141 data_received.__doc__ = tornado.web.RequestHandler.data_received.__doc__
143 @property
144 def elasticsearch(self) -> AsyncElasticsearch:
145 """
146 Get the Elasticsearch client from the settings.
148 This is None if Elasticsearch is not enabled.
149 """
150 return cast(AsyncElasticsearch, self.settings.get("ELASTICSEARCH"))
152 @property
153 def elasticsearch_prefix(self) -> str:
154 """Get the Elasticsearch prefix from the settings."""
155 return self.settings.get( # type: ignore[no-any-return]
156 "ELASTICSEARCH_PREFIX", NAME
157 )
159 def geoip(
160 self,
161 ip: None | str = None,
162 database: str = geoip.__defaults__[0], # type: ignore[index]
163 *,
164 allow_fallback: bool = True,
165 ) -> Coroutine[None, None, None | dict[str, Any]]:
166 """Get GeoIP information."""
167 if not ip:
168 ip = self.request.remote_ip
169 if not EVENT_ELASTICSEARCH.is_set():
170 return geoip(ip, database)
171 return geoip(
172 ip, database, self.elasticsearch, allow_fallback=allow_fallback
173 )
175 async def get_time(self) -> datetime:
176 """Get the start time of the request in the users' timezone."""
177 tz: tzinfo = timezone.utc
178 try:
179 geoip = await self.geoip() # pylint: disable=redefined-outer-name
180 except (ApiError, TransportError):
181 LOGGER.exception("Elasticsearch request failed")
182 if self.apm_client:
183 self.apm_client.capture_exception() # type: ignore[no-untyped-call]
184 else:
185 if geoip and "timezone" in geoip:
186 tz = ZoneInfo(geoip["timezone"])
187 return datetime.fromtimestamp(
188 self.request._start_time, tz=tz # pylint: disable=protected-access
189 )
191 def is_authorized(
192 self, permission: Permission, allow_cookie_auth: bool = True
193 ) -> bool | None:
194 """Check whether the request is authorized."""
195 return is_authorized(self, permission, allow_cookie_auth)
197 @override
198 def log_exception(
199 self,
200 typ: None | type[BaseException],
201 value: None | BaseException,
202 tb: None | TracebackType,
203 ) -> None:
204 if isinstance(value, HTTPError):
205 super().log_exception(typ, value, tb)
206 elif typ is StreamClosedError:
207 LOGGER.debug(
208 "Stream closed %s",
209 self._request_summary(),
210 exc_info=(typ, value, tb), # type: ignore[arg-type]
211 )
212 else:
213 LOGGER.error(
214 "Uncaught exception %s",
215 self._request_summary(),
216 exc_info=(typ, value, tb), # type: ignore[arg-type]
217 )
219 log_exception.__doc__ = tornado.web.RequestHandler.log_exception.__doc__
221 @cached_property
222 def now(self) -> datetime:
223 """Get the current time."""
224 # pylint: disable=method-hidden
225 if pytest_is_running():
226 raise AssertionError("Now accessed before it was set")
227 if self.request.method in self.SUPPORTED_METHODS:
228 LOGGER.error("Now accessed before it was set", stacklevel=3)
229 return datetime.fromtimestamp(
230 self.request._start_time, # pylint: disable=protected-access
231 tz=timezone.utc,
232 )
234 @override
235 async def prepare(self) -> None:
236 """Check authorization and call self.ratelimit()."""
237 # pylint: disable=invalid-overridden-method
238 self.now = await self.get_time()
240 if crawler_secret := self.settings.get("CRAWLER_SECRET"):
241 self.crawler = crawler_secret in self.request.headers.get(
242 "User-Agent", ""
243 )
245 if (
246 self.request.method in {"GET", "HEAD"}
247 and self.redirect_to_canonical_domain()
248 ):
249 return
251 if self.request.method != "OPTIONS" and not await self.ratelimit(True):
252 await self.ratelimit()
254 async def ratelimit(self, global_ratelimit: bool = False) -> bool:
255 """Take b1nzy to space using Redis."""
256 if (
257 not self.settings.get("RATELIMITS")
258 or self.request.method == "OPTIONS"
259 or self.is_authorized(Permission.RATELIMITS)
260 or self.crawler
261 ):
262 return False
264 if not EVENT_REDIS.is_set():
265 LOGGER.warning(
266 (
267 "Ratelimits are enabled, but Redis is not available. "
268 "This can happen shortly after starting the website."
269 ),
270 )
271 raise HTTPError(503)
273 if global_ratelimit: # TODO: add to _RequestHandler
274 ratelimited, headers = await ratelimit(
275 self.redis,
276 self.redis_prefix,
277 str(self.request.remote_ip),
278 bucket=None,
279 max_burst=99, # limit = 100
280 count_per_period=20, # 20 requests per second
281 period=1,
282 tokens=10 if self.settings.get("UNDER_ATTACK") else 1,
283 )
284 else:
285 method = (
286 "GET" if self.request.method == "HEAD" else self.request.method
287 )
288 if not (limit := getattr(self, f"RATELIMIT_{method}_LIMIT", 0)):
289 return False
290 ratelimited, headers = await ratelimit(
291 self.redis,
292 self.redis_prefix,
293 str(self.request.remote_ip),
294 bucket=getattr(
295 self,
296 f"RATELIMIT_{method}_BUCKET",
297 self.__class__.__name__.lower(),
298 ),
299 max_burst=limit - 1,
300 count_per_period=getattr( # request count per period
301 self,
302 f"RATELIMIT_{method}_COUNT_PER_PERIOD",
303 30,
304 ),
305 period=getattr(
306 self, f"RATELIMIT_{method}_PERIOD", 60 # period in seconds
307 ),
308 tokens=1 if self.request.method != "HEAD" else 0,
309 )
311 for header, value in headers.items():
312 self.set_header(header, value)
314 if ratelimited:
315 if self.now.date() == date(self.now.year, 4, 20):
316 self.set_status(420)
317 self.write_error(420)
318 else:
319 self.set_status(429)
320 self.write_error(429)
322 return ratelimited
324 def redirect_to_canonical_domain(self) -> bool:
325 """Redirect to the canonical domain."""
326 if (
327 not (domain := self.settings.get("DOMAIN"))
328 or not self.request.headers.get("Host")
329 or self.request.host_name == domain
330 or self.request.host_name.endswith((".onion", ".i2p"))
331 or regex.fullmatch(r"/[\u2800-\u28FF]+/?", self.request.path)
332 ):
333 return False
334 port = urlsplit(f"//{self.request.headers['Host']}").port
335 self.redirect(
336 urlsplit(self.request.full_url())
337 ._replace(netloc=f"{domain}:{port}" if port else domain)
338 .geturl(),
339 permanent=True,
340 )
341 return True
343 @property
344 def redis(self) -> Redis[str]:
345 """
346 Get the Redis client from the settings.
348 This is None if Redis is not enabled.
349 """
350 return cast("Redis[str]", self.settings.get("REDIS"))
352 @property
353 def redis_prefix(self) -> str:
354 """Get the Redis prefix from the settings."""
355 return self.settings.get( # type: ignore[no-any-return]
356 "REDIS_PREFIX", NAME
357 )
360class BaseRequestHandler(_RequestHandler):
361 """The base request handler used by every page and API."""
363 # pylint: disable=too-many-instance-attributes, too-many-public-methods
365 ELASTIC_RUM_URL: ClassVar[str] = (
366 f"/@apm-rum/elastic-apm-rum.umd{'' if sys.flags.dev_mode else '.min'}.js"
367 "?v=5.12.0"
368 )
370 COMPUTE_ETAG: ClassVar[bool] = True
371 ALLOW_COMPRESSION: ClassVar[bool] = True
372 MAX_BODY_SIZE: ClassVar[None | int] = None
373 ALLOWED_METHODS: ClassVar[tuple[str, ...]] = ("GET",)
374 POSSIBLE_CONTENT_TYPES: ClassVar[tuple[str, ...]] = ()
376 module_info: ModuleInfo
377 # info about page, can be overridden in module_info
378 title: str = "Das Asoziale Netzwerk"
379 short_title: str = "Asoziales Netzwerk"
380 description: str = "Die tolle Webseite des Asozialen Netzwerks"
382 used_render: bool = False
384 active_origin_trials: set[str]
385 content_type: None | str = None
386 apm_script: None | str
387 nonce: str
389 def _finish(
390 self, chunk: None | str | bytes | dict[str, Any] = None
391 ) -> Future[None]:
392 if self._finished:
393 raise RuntimeError("finish() called twice")
395 if chunk is not None:
396 self.write(chunk)
398 if ( # pylint: disable=too-many-boolean-expressions
399 (content_type := self.content_type)
400 and (
401 content_type in TEXT_CONTENT_TYPES
402 or content_type.startswith("text/")
403 or content_type.endswith(("+xml", "+json"))
404 )
405 and self._write_buffer
406 and not self._write_buffer[-1].endswith(b"\n")
407 ):
408 self.write(b"\n")
410 return super().finish()
412 @override
413 def compute_etag(self) -> None | str:
414 """Compute ETag with Base85 encoding."""
415 if not self.COMPUTE_ETAG:
416 return None
417 return f'"{hash_bytes(*self._write_buffer)}"' # noqa: B907
419 @override
420 def decode_argument( # noqa: D102
421 self, value: bytes, name: str | None = None
422 ) -> str:
423 try:
424 return value.decode("UTF-8", "replace")
425 except UnicodeDecodeError as exc:
426 err_msg = f"Invalid unicode in {name or 'url'}: {value[:40]!r}"
427 LOGGER.exception(err_msg, exc_info=exc)
428 raise HTTPError(400, err_msg) from exc
430 @property
431 def dump(self) -> Callable[[Any], str | bytes]:
432 """Get the function for dumping the output."""
433 yaml_subset = self.content_type in {
434 "application/json",
435 "application/vnd.asozial.dynload+json",
436 }
438 if self.content_type == "application/yaml":
439 if self.now.timetuple()[2:0:-1] == (1, 4):
440 yaml_subset = True
441 else:
442 return lambda spam: yaml.dump(
443 spam,
444 width=self.get_int_argument("yaml_width", 80, min_=80),
445 )
447 if yaml_subset:
448 option = ORJSON_OPTIONS
449 if self.get_bool_argument("pretty", False):
450 option |= json.OPT_INDENT_2
451 return lambda spam: json.dumps(spam, option=option)
453 return lambda spam: spam
455 @override
456 def finish( # noqa: D102
457 self, chunk: None | str | bytes | dict[Any, Any] = None
458 ) -> Future[None]:
459 as_json = self.content_type == "application/vnd.asozial.dynload+json"
460 as_plain_text = self.content_type == "text/plain"
461 as_markdown = self.content_type == "text/markdown"
463 if (
464 not isinstance(chunk, bytes | str)
465 or self.content_type == "text/html"
466 or not self.used_render
467 or not (as_json or as_plain_text or as_markdown)
468 ):
469 return self._finish(chunk)
471 chunk = chunk.decode("UTF-8") if isinstance(chunk, bytes) else chunk
473 if as_markdown:
474 return self._finish(
475 f"# {self.title}\n\n"
476 + html2text.html2text(chunk, self.request.full_url()).strip()
477 )
479 soup = BeautifulSoup(chunk, features="lxml")
481 if as_plain_text:
482 return self._finish(soup.get_text("\n", True))
484 dictionary: dict[str, object] = {
485 "url": self.fix_url(include_protocol_and_host=True),
486 "title": self.title,
487 "short_title": (
488 self.short_title if self.title != self.short_title else None
489 ),
490 "body": "".join(
491 str(element)
492 for element in soup.find_all(name="main")[0].contents
493 ).strip(),
494 "scripts": [
495 {"script": script.string} | script.attrs
496 for script in soup.find_all("script")
497 ],
498 "stylesheets": [
499 stylesheet.get("href").strip()
500 for stylesheet in soup.find_all("link", rel="stylesheet")
501 ],
502 "css": "\n".join(style.string for style in soup.find_all("style")),
503 }
505 return self._finish(dictionary)
507 finish.__doc__ = _RequestHandler.finish.__doc__
509 def finish_dict(self, **kwargs: Any) -> Future[None]:
510 """Finish the request with a dictionary."""
511 return self.finish(kwargs)
513 def fix_url(
514 self,
515 url: None | str | SplitResult = None,
516 new_path: None | str = None,
517 include_protocol_and_host: bool | str = False,
518 query_args: Mapping[str, None | str | bool | float] | None = None,
519 ) -> str:
520 """
521 Fix a URL and return it.
523 If the URL is from another website, link to it with the redirect page,
524 otherwise just return the URL with no_3rd_party appended.
525 """
526 query_args_d = dict(query_args or {})
527 del query_args
528 if url is None:
529 url = self.request.full_url()
530 if isinstance(url, str):
531 url = urlsplit(url)
532 if url.netloc and url.netloc.lower() != self.request.host.lower():
533 if (
534 not self.user_settings.ask_before_leaving
535 or not self.settings.get("REDIRECT_MODULE_LOADED")
536 ):
537 return url.geturl()
538 path = "/redirect"
539 query_args_d["to"] = url.geturl()
540 url = urlsplit(self.request.full_url())
541 else:
542 path = url.path if new_path is None else new_path
543 path = f"/{path.strip('/')}".lower()
544 if path == "/lolwut":
545 path = path.upper()
546 if path.startswith("/soundboard/files/") or path in FILE_HASHES_DICT:
547 query_args_d.update(
548 dict.fromkeys(self.user_settings.iter_option_names())
549 )
550 else:
551 for (
552 key,
553 value,
554 ) in self.user_settings.as_dict_with_str_values().items():
555 query_args_d.setdefault(key, value)
556 for key, value in self.user_settings.as_dict_with_str_values(
557 include_query_argument=False,
558 include_body_argument=self.request.path == "/einstellungen"
559 and self.get_bool_argument("save_in_cookie", False),
560 ).items():
561 if value == query_args_d[key]:
562 query_args_d[key] = None
564 result = add_args_to_url(
565 urlunsplit(
566 (
567 self.request.protocol,
568 self.request.host,
569 path,
570 url.query,
571 url.fragment,
572 )
573 ),
574 **query_args_d,
575 )
577 return (
578 result
579 if include_protocol_and_host
580 else result.removeprefix(
581 f"{self.request.protocol}://{self.request.host}"
582 )
583 )
585 @classmethod
586 def get_allowed_methods(cls) -> list[str]:
587 """Get allowed methods."""
588 methods = {"OPTIONS", *cls.ALLOWED_METHODS}
589 if "GET" in cls.ALLOWED_METHODS and cls.supports_head():
590 methods.add("HEAD")
591 return sorted(methods)
593 def get_bool_argument(
594 self,
595 name: str,
596 default: None | bool = None,
597 ) -> bool:
598 """Get an argument parsed as boolean."""
599 if default is not None:
600 return str_to_bool(self.get_argument(name, ""), default)
601 value = str(self.get_argument(name))
602 try:
603 return str_to_bool(value)
604 except ValueError as err:
605 raise HTTPError(400, f"{value} is not a boolean") from err
607 def get_display_scheme(self) -> ColourScheme:
608 """Get the scheme currently displayed."""
609 scheme = self.user_settings.scheme
610 if scheme == "random":
611 return ("light", "dark")[self.now.microsecond & 1]
612 return scheme
614 def get_display_theme(self) -> str:
615 """Get the theme currently displayed."""
616 theme = self.user_settings.theme
618 if theme == "default" and self.now.month == 12:
619 return "christmas"
621 if theme != "random":
622 return theme
624 ignore_themes = ("random", "christmas")
626 return random_choice( # nosec: B311
627 tuple(theme for theme in THEMES if theme not in ignore_themes)
628 )
630 def get_error_message(self, **kwargs: Any) -> str:
631 """
632 Get the error message and return it.
634 If the serve_traceback setting is true (debug mode is activated),
635 the traceback gets returned.
636 """
637 if "exc_info" in kwargs and not issubclass(
638 kwargs["exc_info"][0], HTTPError
639 ):
640 if self.settings.get("serve_traceback") or self.is_authorized(
641 Permission.TRACEBACK
642 ):
643 return "".join(
644 traceback.format_exception(*kwargs["exc_info"])
645 ).strip()
646 return "".join(
647 traceback.format_exception_only(*kwargs["exc_info"][:2])
648 ).strip()
649 if "exc_info" in kwargs and issubclass(
650 kwargs["exc_info"][0], MissingArgumentError
651 ):
652 return cast(str, kwargs["exc_info"][1].log_message)
653 return str(self._reason)
655 def get_error_page_description(self, status_code: int) -> str:
656 """Get the description for the error page."""
657 # pylint: disable=too-many-return-statements
658 # https://developer.mozilla.org/docs/Web/HTTP/Status
659 if 100 <= status_code <= 199:
660 return "Hier gibt es eine total wichtige Information."
661 if 200 <= status_code <= 299:
662 return "Hier ist alles super! 🎶🎶"
663 if 300 <= status_code <= 399:
664 return "Eine Umleitung ist eingerichtet."
665 if 400 <= status_code <= 499:
666 if status_code == 404:
667 return f"{self.request.path} wurde nicht gefunden."
668 if status_code == 451:
669 return "Hier wäre bestimmt geiler Scheiß."
670 return "Ein Client-Fehler ist aufgetreten."
671 if 500 <= status_code <= 599:
672 return "Ein Server-Fehler ist aufgetreten."
673 raise ValueError(
674 f"{status_code} is not a valid HTTP response status code."
675 )
677 def get_int_argument(
678 self,
679 name: str,
680 default: None | int = None,
681 *,
682 max_: None | int = None,
683 min_: None | int = None,
684 ) -> int:
685 """Get an argument parsed as integer."""
686 if default is None:
687 str_value = self.get_argument(name)
688 try:
689 value = int(str_value, base=0)
690 except ValueError as err:
691 raise HTTPError(400, f"{str_value} is not an integer") from err
692 elif self.get_argument(name, ""):
693 try:
694 value = int(self.get_argument(name), base=0)
695 except ValueError:
696 value = default
697 else:
698 value = default
700 if max_ is not None:
701 value = min(max_, value)
702 if min_ is not None:
703 value = max(min_, value)
705 return value
707 def get_module_infos(self) -> tuple[ModuleInfo, ...]:
708 """Get the module infos."""
709 return self.settings.get("MODULE_INFOS") or ()
711 def get_reporting_api_endpoint(self) -> None | str:
712 """Get the endpoint for the Reporting API™️."""
713 if not self.settings.get("REPORTING"):
714 return None
715 endpoint = self.settings.get("REPORTING_ENDPOINT")
717 if not endpoint or not endpoint.startswith("/"):
718 return endpoint
720 return f"{self.request.protocol}://{self.request.host}{endpoint}"
722 @override
723 def get_template_namespace(self) -> dict[str, Any]:
724 """
725 Add useful things to the template namespace and return it.
727 They are mostly needed by most of the pages (like title,
728 description and no_3rd_party).
729 """
730 namespace = super().get_template_namespace()
731 ansi2html = partial(
732 Ansi2HTMLConverter(inline=True, scheme="xterm").convert, full=False
733 )
734 namespace.update(self.user_settings.as_dict())
735 namespace.update(
736 ansi2html=partial(
737 reduce, apply, (ansi2html, ansi_replace, backspace_replace)
738 ),
739 apm_script=(
740 self.settings["ELASTIC_APM"].get("INLINE_SCRIPT")
741 if self.apm_enabled
742 else None
743 ),
744 as_html=self.content_type == "text/html",
745 c=self.now.date() == date(self.now.year, 4, 1)
746 or str_to_bool(self.get_cookie("c", "f") or "f", False),
747 canonical_url=self.request.protocol
748 + "://"
749 + (self.settings["DOMAIN"] or self.request.host)
750 + self.fix_url(
751 self.request.full_url().upper()
752 if self.request.path.upper().startswith("/LOLWUT")
753 else self.request.full_url().lower()
754 )
755 .split("?")[0]
756 .removesuffix("/"),
757 description=self.description,
758 display_theme=self.get_display_theme(),
759 display_scheme=self.get_display_scheme(),
760 elastic_rum_url=self.ELASTIC_RUM_URL,
761 fix_static=lambda path: self.fix_url(fix_static_path(path)),
762 fix_url=self.fix_url,
763 emoji2html=(
764 emoji2html
765 if self.user_settings.openmoji == "img"
766 else (
767 (lambda emoji: f'<span class="openmoji">{emoji}</span>')
768 if self.user_settings.openmoji
769 else (lambda emoji: f"<span>{emoji}</span>")
770 )
771 ),
772 form_appendix=self.user_settings.get_form_appendix(),
773 GH_ORG_URL=GH_ORG_URL,
774 GH_PAGES_URL=GH_PAGES_URL,
775 GH_REPO_URL=GH_REPO_URL,
776 keywords="Asoziales Netzwerk, Känguru-Chroniken"
777 + (
778 f", {self.module_info.get_keywords_as_str(self.request.path)}"
779 if self.module_info # type: ignore[truthy-bool]
780 else ""
781 ),
782 lang="de", # TODO: add language support
783 nonce=self.nonce,
784 now=self.now,
785 openmoji_version=OPENMOJI_VERSION,
786 settings=self.settings,
787 short_title=self.short_title,
788 testing=pytest_is_running(),
789 title=self.title,
790 )
791 namespace.update(
792 {
793 "🥚": timedelta()
794 <= self.now.date() - easter(self.now.year)
795 < timedelta(days=2),
796 "🦘": is_prime(self.now.microsecond),
797 }
798 )
799 return namespace
801 def get_user_id(self) -> str:
802 """Get the user id saved in the cookie or create one."""
803 cookie = self.get_secure_cookie(
804 "user_id",
805 max_age_days=90,
806 min_version=2,
807 )
809 user_id = cookie.decode("UTF-8") if cookie else str(uuid.uuid4())
811 if not self.get_secure_cookie( # save it in cookie or reset expiry date
812 "user_id", max_age_days=30, min_version=2
813 ):
814 self.set_secure_cookie(
815 "user_id",
816 user_id,
817 expires_days=90,
818 path="/",
819 samesite="Strict",
820 )
822 return user_id
824 def handle_accept_header( # pylint: disable=inconsistent-return-statements
825 self, possible_content_types: tuple[str, ...], strict: bool = True
826 ) -> None:
827 """Handle the Accept header and set `self.content_type`."""
828 if not possible_content_types:
829 return
830 content_type = get_best_match(
831 self.request.headers.get("Accept") or "*/*",
832 possible_content_types,
833 )
834 if content_type is None:
835 if strict:
836 return self.handle_not_acceptable(possible_content_types)
837 content_type = possible_content_types[0]
838 self.content_type = content_type
839 self.set_content_type_header()
841 def handle_not_acceptable(
842 self, possible_content_types: tuple[str, ...]
843 ) -> None:
844 """Only call this if we cannot respect the Accept header."""
845 self.clear_header("Content-Type")
846 self.set_status(406)
847 raise Finish("\n".join(possible_content_types) + "\n")
849 def head(self, *args: Any, **kwargs: Any) -> None | Awaitable[None]:
850 """Handle HEAD requests."""
851 if self.get.__module__ == "tornado.web":
852 raise HTTPError(405)
853 if not self.supports_head():
854 raise HTTPError(501)
856 kwargs["head"] = True
857 return self.get(*args, **kwargs)
859 @override
860 def initialize(
861 self,
862 *,
863 module_info: ModuleInfo,
864 # default is true, because then empty args dicts are
865 # enough to specify that the defaults should be used
866 default_title: bool = True,
867 default_description: bool = True,
868 ) -> None:
869 """
870 Get title and description from the kwargs.
872 If title and description are present in the kwargs,
873 then they override self.title and self.description.
874 """
875 self.module_info = module_info
876 if not default_title:
877 page_info = self.module_info.get_page_info(self.request.path)
878 self.title = page_info.name
879 self.short_title = page_info.short_name or self.title
880 if not default_description:
881 self.description = self.module_info.get_page_info(
882 self.request.path
883 ).description
885 @override
886 async def options(self, *args: Any, **kwargs: Any) -> None:
887 """Handle OPTIONS requests."""
888 # pylint: disable=unused-argument
889 self.set_header("Allow", ", ".join(self.get_allowed_methods()))
890 self.set_status(204)
891 await self.finish()
893 def origin_trial(self, token: bytes | str) -> bool:
894 """Enable an experimental feature."""
895 # pylint: disable=protected-access
896 payload = json.loads(b64decode(token)[69:])
897 if payload["feature"] in self.active_origin_trials:
898 return True
899 origin = urlsplit(payload["origin"])
900 url = urlsplit(self.request.full_url())
901 if url.port is None and url.scheme in {"http", "https"}:
902 url = url._replace(
903 netloc=f"{url.hostname}:{443 if url.scheme == 'https' else 80}"
904 )
905 if self.request._start_time > payload["expiry"]:
906 return False
907 if url.scheme != origin.scheme:
908 return False
909 if url.netloc != origin.netloc and not (
910 payload.get("isSubdomain")
911 and url.netloc.endswith(f".{origin.netloc}")
912 ):
913 return False
914 self.add_header("Origin-Trial", token)
915 self.active_origin_trials.add(payload["feature"])
916 return True
918 @override
919 async def prepare(self) -> None:
920 """Check authorization and call self.ratelimit()."""
921 await super().prepare()
923 if self._finished:
924 return
926 if not self.ALLOW_COMPRESSION:
927 for transform in self._transforms:
928 if isinstance(transform, GZipContentEncoding):
929 # pylint: disable=protected-access
930 transform._gzipping = False
932 self.handle_accept_header(self.POSSIBLE_CONTENT_TYPES)
934 if self.request.method == "GET" and (
935 days := Random(self.now.timestamp()).randint(0, 31337)
936 ) in {
937 69,
938 420,
939 1337,
940 31337,
941 }:
942 self.set_cookie("c", "s", expires_days=days / 24, path="/")
944 if (
945 self.request.method != "OPTIONS"
946 and self.MAX_BODY_SIZE is not None
947 and len(self.request.body) > self.MAX_BODY_SIZE
948 ):
949 LOGGER.warning(
950 "%s > MAX_BODY_SIZE (%s)",
951 len(self.request.body),
952 self.MAX_BODY_SIZE,
953 )
954 raise HTTPError(413)
956 @override
957 def render( # noqa: D102
958 self, template_name: str, **kwargs: Any
959 ) -> Future[None]:
960 self.used_render = True
961 return super().render(template_name, **kwargs)
963 render.__doc__ = _RequestHandler.render.__doc__
965 def set_content_type_header(self) -> None:
966 """Set the Content-Type header based on `self.content_type`."""
967 if str(self.content_type).startswith("text/"): # RFC 2616 (3.7.1)
968 self.set_header(
969 "Content-Type", f"{self.content_type};charset=utf-8"
970 )
971 elif self.content_type is not None:
972 self.set_header("Content-Type", self.content_type)
974 @override
975 def set_cookie( # noqa: D102 # pylint: disable=too-many-arguments
976 self,
977 name: str,
978 value: str | bytes,
979 domain: None | str = None,
980 expires: None | float | tuple[int, ...] | datetime = None,
981 path: str = "/",
982 expires_days: None | float = 400, # changed
983 *,
984 secure: bool | None = None,
985 httponly: bool = True,
986 **kwargs: Any,
987 ) -> None:
988 if "samesite" not in kwargs:
989 # default for same site should be strict
990 kwargs["samesite"] = "Strict"
992 super().set_cookie(
993 name,
994 value,
995 domain,
996 expires,
997 path,
998 expires_days,
999 secure=(
1000 self.request.protocol == "https" if secure is None else secure
1001 ),
1002 httponly=httponly,
1003 **kwargs,
1004 )
1006 set_cookie.__doc__ = _RequestHandler.set_cookie.__doc__
1008 def set_csp_header(self) -> None:
1009 """Set the Content-Security-Policy header."""
1010 self.nonce = secrets.token_urlsafe(16)
1012 script_src = ["'self'", f"'nonce-{self.nonce}'"]
1014 if (
1015 self.apm_enabled
1016 and "INLINE_SCRIPT_HASH" in self.settings["ELASTIC_APM"]
1017 ):
1018 script_src.extend(
1019 (
1020 f"'sha256-{self.settings['ELASTIC_APM']['INLINE_SCRIPT_HASH']}'",
1021 "'unsafe-inline'", # for browsers that don't support hash
1022 )
1023 )
1025 connect_src = ["'self'"]
1027 if self.apm_enabled and "SERVER_URL" in self.settings["ELASTIC_APM"]:
1028 rum_server_url = self.settings["ELASTIC_APM"].get("RUM_SERVER_URL")
1029 if rum_server_url:
1030 # the RUM agent needs to connect to rum_server_url
1031 connect_src.append(rum_server_url)
1032 elif rum_server_url is None:
1033 # the RUM agent needs to connect to ["ELASTIC_APM"]["SERVER_URL"]
1034 connect_src.append(self.settings["ELASTIC_APM"]["SERVER_URL"])
1036 connect_src.append( # fix for older browsers
1037 ("wss" if self.request.protocol == "https" else "ws")
1038 + f"://{self.request.host}"
1039 )
1041 self.set_header(
1042 "Content-Security-Policy",
1043 "default-src 'self';"
1044 f"script-src {' '.join(script_src)};"
1045 f"connect-src {' '.join(connect_src)};"
1046 "style-src 'self' 'unsafe-inline';"
1047 "img-src 'self' https://img.zeit.de https://github.asozial.org;"
1048 "frame-ancestors 'self';"
1049 "sandbox allow-downloads allow-same-origin allow-modals"
1050 " allow-popups-to-escape-sandbox allow-scripts allow-popups"
1051 " allow-top-navigation-by-user-activation allow-forms;"
1052 "report-to default;"
1053 "base-uri 'none';"
1054 + (
1055 f"report-uri {self.get_reporting_api_endpoint()};"
1056 if self.settings.get("REPORTING")
1057 else ""
1058 ),
1059 )
1061 @override
1062 def set_default_headers(self) -> None:
1063 """Set default headers."""
1064 self.set_csp_header()
1065 self.active_origin_trials = set()
1066 if self.settings.get("REPORTING"):
1067 endpoint = self.get_reporting_api_endpoint()
1068 self.set_header(
1069 "Reporting-Endpoints",
1070 f'default="{endpoint}"', # noqa: B907
1071 )
1072 self.set_header(
1073 "Report-To",
1074 json.dumps(
1075 {
1076 "group": "default",
1077 "max_age": 2592000,
1078 "endpoints": [{"url": endpoint}],
1079 },
1080 option=ORJSON_OPTIONS,
1081 ),
1082 )
1083 self.set_header("NEL", '{"report_to":"default","max_age":2592000}')
1084 self.set_header("X-Content-Type-Options", "nosniff")
1085 self.set_header("Access-Control-Max-Age", "7200")
1086 self.set_header("Access-Control-Allow-Origin", "*")
1087 self.set_header("Access-Control-Allow-Headers", "*")
1088 self.set_header(
1089 "Access-Control-Allow-Methods",
1090 ", ".join(self.get_allowed_methods()),
1091 )
1092 self.set_header("Cross-Origin-Resource-Policy", "cross-origin")
1093 self.set_header(
1094 "Permissions-Policy",
1095 "browsing-topics=(),"
1096 "identity-credentials-get=(),"
1097 "join-ad-interest-group=(),"
1098 "private-state-token-issuance=(),"
1099 "private-state-token-redemption=(),"
1100 "run-ad-auction=()",
1101 )
1102 self.set_header("Referrer-Policy", "same-origin")
1103 self.set_header(
1104 "Cross-Origin-Opener-Policy", "same-origin; report-to=default"
1105 )
1106 if self.request.path == "/kaenguru-comics-alt": # TODO: improve this
1107 self.set_header(
1108 "Cross-Origin-Embedder-Policy",
1109 "credentialless; report-to=default",
1110 )
1111 else:
1112 self.set_header(
1113 "Cross-Origin-Embedder-Policy",
1114 "require-corp; report-to=default",
1115 )
1116 if self.settings.get("HSTS"):
1117 self.set_header("Strict-Transport-Security", "max-age=63072000")
1118 if (
1119 onion_address := self.settings.get("ONION_ADDRESS")
1120 ) and not self.request.host_name.endswith(".onion"):
1121 self.set_header(
1122 "Onion-Location",
1123 onion_address
1124 + self.request.path
1125 + (f"?{self.request.query}" if self.request.query else ""),
1126 )
1127 if self.settings.get("debug"):
1128 self.set_header("X-Debug", bool_to_str(True))
1129 for permission in Permission:
1130 if permission.name:
1131 self.set_header(
1132 f"X-Permission-{permission.name}",
1133 bool_to_str(bool(self.is_authorized(permission))),
1134 )
1135 self.set_header("Vary", "Accept, Authorization, Cookie")
1137 set_default_headers.__doc__ = _RequestHandler.set_default_headers.__doc__
1139 @classmethod
1140 def supports_head(cls) -> bool:
1141 """Check whether this request handler supports HEAD requests."""
1142 signature = inspect.signature(cls.get)
1143 return (
1144 "head" in signature.parameters
1145 and signature.parameters["head"].kind
1146 == inspect.Parameter.KEYWORD_ONLY
1147 )
1149 @cached_property
1150 def user_settings(self) -> Options:
1151 """Get the user settings."""
1152 return Options(self)
1154 @override
1155 def write(self, chunk: str | bytes | dict[str, Any]) -> None: # noqa: D102
1156 if self._finished:
1157 raise RuntimeError("Cannot write() after finish()")
1159 self.set_content_type_header()
1161 if isinstance(chunk, dict):
1162 chunk = self.dump(chunk)
1164 if self.now.date() == date(self.now.year, 4, 27):
1165 if isinstance(chunk, bytes):
1166 with contextlib.suppress(UnicodeDecodeError):
1167 chunk = chunk.decode("UTF-8")
1168 if isinstance(chunk, str):
1169 chunk = regex.sub(
1170 r"\b\p{Lu}\p{Ll}{4}\p{Ll}*\b",
1171 lambda match: (
1172 "Stanley"
1173 if Random(match[0]).randrange(5) == self.now.year % 5
1174 else match[0]
1175 ),
1176 chunk,
1177 )
1179 super().write(chunk)
1181 write.__doc__ = _RequestHandler.write.__doc__
1183 @override
1184 def write_error(self, status_code: int, **kwargs: Any) -> None:
1185 """Render the error page."""
1186 dict_content_types: tuple[str, str] = (
1187 "application/json",
1188 "application/yaml",
1189 )
1190 all_error_content_types: tuple[str, ...] = (
1191 # text/plain as first (default), to not screw up output in terminals
1192 "text/plain",
1193 "text/html",
1194 "text/markdown",
1195 *dict_content_types,
1196 "application/vnd.asozial.dynload+json",
1197 )
1199 if self.content_type not in all_error_content_types:
1200 # don't send 406, instead default with text/plain
1201 self.handle_accept_header(all_error_content_types, strict=False)
1203 if self.content_type == "text/html":
1204 self.render( # type: ignore[unused-awaitable]
1205 "error.html",
1206 status=status_code,
1207 reason=self.get_error_message(**kwargs),
1208 description=self.get_error_page_description(status_code),
1209 is_traceback="exc_info" in kwargs
1210 and not issubclass(kwargs["exc_info"][0], HTTPError)
1211 and (
1212 self.settings.get("serve_traceback")
1213 or self.is_authorized(Permission.TRACEBACK)
1214 ),
1215 )
1216 return
1218 if self.content_type in dict_content_types:
1219 self.finish( # type: ignore[unused-awaitable]
1220 {
1221 "status": status_code,
1222 "reason": self.get_error_message(**kwargs),
1223 }
1224 )
1225 return
1227 self.finish( # type: ignore[unused-awaitable]
1228 f"{status_code} {self.get_error_message(**kwargs)}\n"
1229 )
1231 write_error.__doc__ = _RequestHandler.write_error.__doc__