Coverage for an_website / utils / base_request_handler.py: 78.905%
493 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-24 17:35 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-24 17:35 +0000
1# This program is free software: you can redistribute it and/or modify
2# it under the terms of the GNU Affero General Public License as
3# published by the Free Software Foundation, either version 3 of the
4# License, or (at your option) any later version.
5#
6# This program is distributed in the hope that it will be useful,
7# but WITHOUT ANY WARRANTY; without even the implied warranty of
8# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9# GNU Affero General Public License for more details.
10#
11# You should have received a copy of the GNU Affero General Public License
12# along with this program. If not, see <https://www.gnu.org/licenses/>.
13# pylint: disable=too-many-lines
15"""
16The base request handler used by other modules.
18This should only contain the BaseRequestHandler class.
19"""
21import contextlib
22import inspect
23import logging
24import secrets
25import sys
26import traceback
27import uuid
28from asyncio import Future
29from base64 import b64decode
30from collections.abc import Awaitable, Callable, Coroutine, Mapping
31from contextvars import ContextVar
32from datetime import date, datetime, timedelta, timezone, tzinfo
33from functools import cached_property, partial, reduce
34from random import Random, choice as random_choice
35from types import TracebackType
36from typing import Any, ClassVar, Final, cast, override
37from urllib.parse import SplitResult, urlsplit, urlunsplit
38from zoneinfo import ZoneInfo
40import elasticapm
41import html2text
42import orjson as json
43import regex
44import tornado.web
45import yaml
46from accept_types import get_best_match # type: ignore[import-untyped]
47from ansi2html import Ansi2HTMLConverter
48from bs4 import BeautifulSoup
49from dateutil.easter import easter
50from elastic_transport import ApiError, TransportError
51from elasticsearch import AsyncElasticsearch
52from openmoji_dist import VERSION as OPENMOJI_VERSION
53from redis.asyncio import Redis
54from tornado.httputil import HTTPServerRequest
55from tornado.iostream import StreamClosedError
56from tornado.web import (
57 Finish,
58 GZipContentEncoding,
59 HTTPError,
60 MissingArgumentError,
61 OutputTransform,
62)
64from .. import (
65 EVENT_ELASTICSEARCH,
66 EVENT_REDIS,
67 GH_ORG_URL,
68 GH_PAGES_URL,
69 GH_REPO_URL,
70 NAME,
71 ORJSON_OPTIONS,
72 pytest_is_running,
73)
74from .decorators import is_authorized
75from .options import ColourScheme, Options
76from .static_file_handling import FILE_HASHES_DICT, fix_static_path
77from .themes import THEMES
78from .utils import (
79 ModuleInfo,
80 Permission,
81 add_args_to_url,
82 ansi_replace,
83 apply,
84 backspace_replace,
85 bool_to_str,
86 emoji2html,
87 geoip,
88 hash_bytes,
89 is_prime,
90 ratelimit,
91 str_to_bool,
92)
94LOGGER: Final = logging.getLogger(__name__)
96TEXT_CONTENT_TYPES: Final[set[str]] = {
97 "application/javascript",
98 "application/json",
99 "application/vnd.asozial.dynload+json",
100 "application/x-ndjson",
101 "application/xml",
102 "application/yaml",
103}
105CLACKS_OVERHEADS = (
106 "GNU Aaron Swartz",
107 "GNU Carol Angie Deborah Maltesi",
108 "GNU Charlotte Angie",
109 "GNU Terry Pratchett",
110)
112request_ctx_var: ContextVar[HTTPServerRequest] = ContextVar("current_request")
115class _RequestHandler(tornado.web.RequestHandler):
116 """Base for Tornado request handlers."""
118 crawler: bool = False
120 @override
121 async def _execute(
122 self, transforms: list[OutputTransform], *args: bytes, **kwargs: bytes
123 ) -> None:
124 request_ctx_var.set(self.request)
126 self.now = await self.get_time()
128 return await super()._execute(transforms, *args, **kwargs)
130 # pylint: disable-next=protected-access
131 _execute.__doc__ = tornado.web.RequestHandler._execute.__doc__
133 @property
134 def apm_client(self) -> None | elasticapm.Client:
135 """Get the APM client from the settings."""
136 return self.settings.get("ELASTIC_APM", {}).get("CLIENT") # type: ignore[no-any-return]
138 @property
139 def apm_enabled(self) -> bool:
140 """Return whether APM is enabled."""
141 return bool(self.settings.get("ELASTIC_APM", {}).get("ENABLED"))
143 @override
144 def data_received( # noqa: D102
145 self, chunk: bytes
146 ) -> None | Awaitable[None]:
147 pass
149 data_received.__doc__ = tornado.web.RequestHandler.data_received.__doc__
151 @property
152 def elasticsearch(self) -> AsyncElasticsearch:
153 """
154 Get the Elasticsearch client from the settings.
156 This is None if Elasticsearch is not enabled.
157 """
158 return cast(AsyncElasticsearch, self.settings.get("ELASTICSEARCH"))
160 @property
161 def elasticsearch_prefix(self) -> str:
162 """Get the Elasticsearch prefix from the settings."""
163 return self.settings.get( # type: ignore[no-any-return]
164 "ELASTICSEARCH_PREFIX", NAME
165 )
167 def geoip(
168 self,
169 ip: None | str = None,
170 database: str = geoip.__defaults__[0], # type: ignore[index]
171 *,
172 allow_fallback: bool = True,
173 ) -> Coroutine[None, None, None | dict[str, Any]]:
174 """Get GeoIP information."""
175 if not ip:
176 ip = self.request.remote_ip
177 if not EVENT_ELASTICSEARCH.is_set():
178 return geoip(ip, database)
179 return geoip(
180 ip, database, self.elasticsearch, allow_fallback=allow_fallback
181 )
183 async def get_time(self) -> datetime:
184 """Get the start time of the request in the users' timezone."""
185 tz: tzinfo = timezone.utc
186 try:
187 geoip = await self.geoip() # pylint: disable=redefined-outer-name
188 except ApiError, TransportError:
189 LOGGER.exception("Elasticsearch request failed")
190 if self.apm_client:
191 self.apm_client.capture_exception() # type: ignore[no-untyped-call]
192 else:
193 if geoip and "timezone" in geoip:
194 tz = ZoneInfo(geoip["timezone"])
195 return datetime.fromtimestamp(
196 self.request._start_time, tz=tz # pylint: disable=protected-access
197 )
199 def is_authorized(
200 self, permission: Permission, allow_cookie_auth: bool = True
201 ) -> bool | None:
202 """Check whether the request is authorized."""
203 return is_authorized(self, permission, allow_cookie_auth)
205 @override
206 def log_exception(
207 self,
208 typ: None | type[BaseException],
209 value: None | BaseException,
210 tb: None | TracebackType,
211 ) -> None:
212 if isinstance(value, HTTPError):
213 super().log_exception(typ, value, tb)
214 elif typ is StreamClosedError:
215 LOGGER.debug(
216 "Stream closed %s",
217 self._request_summary(),
218 exc_info=(typ, value, tb), # type: ignore[arg-type]
219 )
220 else:
221 LOGGER.error(
222 "Uncaught exception %s",
223 self._request_summary(),
224 exc_info=(typ, value, tb), # type: ignore[arg-type]
225 )
227 log_exception.__doc__ = tornado.web.RequestHandler.log_exception.__doc__
229 @cached_property
230 def now(self) -> datetime:
231 """Get the current time."""
232 # pylint: disable=method-hidden
233 if pytest_is_running():
234 raise AssertionError("Now accessed before it was set")
235 # if self.request.method in self.SUPPORTED_METHODS: # Why?
236 LOGGER.error("Now accessed before it was set", stacklevel=3)
237 return self.now_utc
239 @cached_property
240 def now_utc(self) -> datetime:
241 """Get the current time in the correct timezone."""
242 return datetime.fromtimestamp(
243 self.request._start_time, # pylint: disable=protected-access
244 tz=timezone.utc,
245 )
247 @override # pylint: disable-next=invalid-overridden-method
248 async def prepare(self) -> None:
249 """Check authorization and call self.ratelimit()."""
250 if crawler_secret := self.settings.get("CRAWLER_SECRET"):
251 self.crawler = crawler_secret in self.request.headers.get(
252 "User-Agent", ""
253 )
255 if (
256 self.request.method in {"GET", "HEAD"}
257 and self.redirect_to_canonical_domain()
258 ):
259 return
261 if self.request.method != "OPTIONS" and not await self.ratelimit(True):
262 await self.ratelimit()
264 async def ratelimit(self, global_ratelimit: bool = False) -> bool:
265 """Take b1nzy to space using Redis."""
266 if (
267 not self.settings.get("RATELIMITS")
268 or self.request.method == "OPTIONS"
269 or self.is_authorized(Permission.RATELIMITS)
270 or self.crawler
271 ):
272 return False
274 if not EVENT_REDIS.is_set():
275 LOGGER.warning(
276 (
277 "Ratelimits are enabled, but Redis is not available. "
278 "This can happen shortly after starting the website."
279 ),
280 )
281 raise HTTPError(503)
283 if global_ratelimit: # TODO: add to _RequestHandler
284 ratelimited, headers = await ratelimit(
285 self.redis,
286 self.redis_prefix,
287 str(self.request.remote_ip),
288 bucket=None,
289 max_burst=99, # limit = 100
290 count_per_period=20, # 20 requests per second
291 period=1,
292 tokens=10 if self.settings.get("UNDER_ATTACK") else 1,
293 )
294 else:
295 method = (
296 "GET" if self.request.method == "HEAD" else self.request.method
297 )
298 if not (limit := getattr(self, f"RATELIMIT_{method}_LIMIT", 0)):
299 return False
300 ratelimited, headers = await ratelimit(
301 self.redis,
302 self.redis_prefix,
303 str(self.request.remote_ip),
304 bucket=getattr(
305 self,
306 f"RATELIMIT_{method}_BUCKET",
307 self.__class__.__name__.lower(),
308 ),
309 max_burst=limit - 1,
310 count_per_period=getattr( # request count per period
311 self,
312 f"RATELIMIT_{method}_COUNT_PER_PERIOD",
313 30,
314 ),
315 period=getattr(
316 self, f"RATELIMIT_{method}_PERIOD", 60 # period in seconds
317 ),
318 tokens=1 if self.request.method != "HEAD" else 0,
319 )
321 for header, value in headers.items():
322 self.set_header(header, value)
324 if ratelimited:
325 if self.now.date() == date(self.now.year, 4, 20):
326 self.set_status(420)
327 self.write_error(420)
328 else:
329 self.set_status(429)
330 self.write_error(429)
332 return ratelimited
334 def redirect_to_canonical_domain(self) -> bool:
335 """Redirect to the canonical domain."""
336 if (
337 not (domain := self.settings.get("DOMAIN"))
338 or not self.request.headers.get("Host")
339 or self.request.host_name == domain
340 or self.request.host_name.endswith((".onion", ".i2p"))
341 or regex.fullmatch(r"/[\u2800-\u28FF]+/?", self.request.path)
342 ):
343 return False
344 port = urlsplit(f"//{self.request.headers['Host']}").port
345 self.redirect(
346 urlsplit(self.request.full_url())
347 ._replace(netloc=f"{domain}:{port}" if port else domain)
348 .geturl(),
349 permanent=True,
350 )
351 return True
353 @property
354 def redis(self) -> Redis[str]:
355 """
356 Get the Redis client from the settings.
358 This is None if Redis is not enabled.
359 """
360 return cast("Redis[str]", self.settings.get("REDIS"))
362 @property
363 def redis_prefix(self) -> str:
364 """Get the Redis prefix from the settings."""
365 return self.settings.get( # type: ignore[no-any-return]
366 "REDIS_PREFIX", NAME
367 )
370class BaseRequestHandler(_RequestHandler):
371 """The base request handler used by every page and API."""
373 # pylint: disable=too-many-instance-attributes, too-many-public-methods
375 ELASTIC_RUM_URL: ClassVar[str] = (
376 f"/@apm-rum/elastic-apm-rum.umd{'' if sys.flags.dev_mode else '.min'}.js"
377 "?v=5.12.0"
378 )
380 COMPUTE_ETAG: ClassVar[bool] = True
381 ALLOW_COMPRESSION: ClassVar[bool] = True
382 MAX_BODY_SIZE: ClassVar[None | int] = None
383 ALLOWED_METHODS: ClassVar[tuple[str, ...]] = ("GET",)
384 POSSIBLE_CONTENT_TYPES: ClassVar[tuple[str, ...]] = ()
386 module_info: ModuleInfo
387 # info about page, can be overridden in module_info
388 title: str = "Das Asoziale Netzwerk"
389 short_title: str = "Asoziales Netzwerk"
390 description: str = "Die tolle Webseite des Asozialen Netzwerks"
392 used_render: bool = False
394 active_origin_trials: set[str]
395 content_type: None | str = None
396 apm_script: None | str
397 nonce: str
399 def _finish(
400 self, chunk: None | str | bytes | dict[str, Any] = None
401 ) -> Future[None]:
402 if self._finished:
403 raise RuntimeError("finish() called twice")
405 if chunk is not None:
406 self.write(chunk)
408 if ( # pylint: disable=too-many-boolean-expressions
409 (content_type := self.content_type)
410 and (
411 content_type in TEXT_CONTENT_TYPES
412 or content_type.startswith("text/")
413 or content_type.endswith(("+xml", "+json"))
414 )
415 and self._write_buffer
416 and not self._write_buffer[-1].endswith(b"\n")
417 ):
418 self.write(b"\n")
420 return super().finish()
422 @override
423 def compute_etag(self) -> None | str:
424 """Compute ETag with Base85 encoding."""
425 if not self.COMPUTE_ETAG:
426 return None
427 return f'"{hash_bytes(*self._write_buffer)}"' # noqa: B907
429 @override
430 def decode_argument( # noqa: D102
431 self, value: bytes, name: str | None = None
432 ) -> str:
433 try:
434 return value.decode("UTF-8", "replace")
435 except UnicodeDecodeError as exc:
436 err_msg = f"Invalid unicode in {name or 'url'}: {value[:40]!r}"
437 LOGGER.exception(err_msg, exc_info=exc)
438 raise HTTPError(400, err_msg) from exc
440 @property
441 def dump(self) -> Callable[[Any], str | bytes]:
442 """Get the function for dumping the output."""
443 yaml_subset = self.content_type in {
444 "application/json",
445 "application/vnd.asozial.dynload+json",
446 }
448 if self.content_type == "application/yaml":
449 if self.now.timetuple()[2:0:-1] == (1, 4):
450 yaml_subset = True
451 else:
452 return lambda spam: yaml.dump(
453 spam,
454 width=self.get_int_argument("yaml_width", 80, min_=80),
455 )
457 if yaml_subset:
458 option = ORJSON_OPTIONS
459 if self.get_bool_argument("pretty", False):
460 option |= json.OPT_INDENT_2
461 return lambda spam: json.dumps(spam, option=option)
463 return lambda spam: spam
465 @override
466 def finish( # noqa: D102
467 self, chunk: None | str | bytes | dict[Any, Any] = None
468 ) -> Future[None]:
469 as_json = self.content_type == "application/vnd.asozial.dynload+json"
470 as_plain_text = self.content_type == "text/plain"
471 as_markdown = self.content_type == "text/markdown"
473 if (
474 not isinstance(chunk, bytes | str)
475 or self.content_type == "text/html"
476 or not self.used_render
477 or not (as_json or as_plain_text or as_markdown)
478 ):
479 return self._finish(chunk)
481 chunk = chunk.decode("UTF-8") if isinstance(chunk, bytes) else chunk
483 if as_markdown:
484 return self._finish(
485 f"# {self.title}\n\n"
486 + html2text.html2text(chunk, self.request.full_url()).strip()
487 )
489 soup = BeautifulSoup(chunk, features="lxml")
491 if as_plain_text:
492 return self._finish(soup.get_text("\n", True))
494 dictionary: dict[str, object] = {
495 "url": self.fix_url(include_protocol_and_host=True),
496 "title": self.title,
497 "short_title": (
498 self.short_title if self.title != self.short_title else None
499 ),
500 "body": "".join(
501 str(element)
502 for element in soup.find_all(name="main")[0].contents
503 ).strip(),
504 "scripts": [
505 {"script": script.string} | script.attrs
506 for script in soup.find_all("script")
507 ],
508 "stylesheets": [
509 stylesheet.get("href").strip()
510 for stylesheet in soup.find_all("link", rel="stylesheet")
511 ],
512 "css": "\n".join(style.string for style in soup.find_all("style")),
513 }
515 return self._finish(dictionary)
517 finish.__doc__ = _RequestHandler.finish.__doc__
519 def finish_dict(self, **kwargs: Any) -> Future[None]:
520 """Finish the request with a dictionary."""
521 return self.finish(kwargs)
523 def fix_url(
524 self,
525 url: None | str | SplitResult = None,
526 new_path: None | str = None,
527 include_protocol_and_host: bool | str = False,
528 query_args: Mapping[str, None | str | bool | float] | None = None,
529 ) -> str:
530 """
531 Fix a URL and return it.
533 If the URL is from another website, link to it with the redirect page,
534 otherwise just return the URL with no_3rd_party appended.
535 """
536 query_args_d = dict(query_args or {})
537 del query_args
538 if url is None:
539 url = self.request.full_url()
540 if isinstance(url, str):
541 url = urlsplit(url)
542 if url.netloc and url.netloc.lower() != self.request.host.lower():
543 if (
544 not self.user_settings.ask_before_leaving
545 or not self.settings.get("REDIRECT_MODULE_LOADED")
546 ):
547 return url.geturl()
548 path = "/redirect"
549 query_args_d["to"] = url.geturl()
550 url = urlsplit(self.request.full_url())
551 else:
552 path = url.path if new_path is None else new_path
553 path = f"/{path.strip('/')}".lower()
554 if path == "/lolwut":
555 path = path.upper()
556 if path.startswith("/soundboard/files/") or path in FILE_HASHES_DICT:
557 query_args_d.update(
558 dict.fromkeys(self.user_settings.iter_option_names())
559 )
560 else:
561 for (
562 key,
563 value,
564 ) in self.user_settings.as_dict_with_str_values().items():
565 query_args_d.setdefault(key, value)
566 for key, value in self.user_settings.as_dict_with_str_values(
567 include_query_argument=False,
568 include_body_argument=self.request.path == "/einstellungen"
569 and self.get_bool_argument("save_in_cookie", False),
570 ).items():
571 if value == query_args_d[key]:
572 query_args_d[key] = None
574 result = add_args_to_url(
575 urlunsplit(
576 (
577 self.request.protocol,
578 self.request.host,
579 path,
580 url.query,
581 url.fragment,
582 )
583 ),
584 **query_args_d,
585 )
587 return (
588 result
589 if include_protocol_and_host
590 else result.removeprefix(
591 f"{self.request.protocol}://{self.request.host}"
592 )
593 )
595 @classmethod
596 def get_allowed_methods(cls) -> list[str]:
597 """Get allowed methods."""
598 methods = {"OPTIONS", *cls.ALLOWED_METHODS}
599 if "GET" in cls.ALLOWED_METHODS and cls.supports_head():
600 methods.add("HEAD")
601 return sorted(methods)
603 def get_bool_argument(
604 self,
605 name: str,
606 default: None | bool = None,
607 ) -> bool:
608 """Get an argument parsed as boolean."""
609 if default is not None:
610 return str_to_bool(self.get_argument(name, ""), default)
611 value = str(self.get_argument(name))
612 try:
613 return str_to_bool(value)
614 except ValueError as err:
615 raise HTTPError(400, f"{value} is not a boolean") from err
617 def get_display_scheme(self) -> ColourScheme:
618 """Get the scheme currently displayed."""
619 scheme = self.user_settings.scheme
620 if scheme == "random":
621 return ("light", "dark")[self.now.microsecond & 1]
622 return scheme
624 def get_display_theme(self) -> str:
625 """Get the theme currently displayed."""
626 theme = self.user_settings.theme
628 if theme == "default" and self.now.month == 12:
629 return "christmas"
631 if theme != "random":
632 return theme
634 ignore_themes = ("random", "christmas")
636 return random_choice( # nosec: B311
637 tuple(theme for theme in THEMES if theme not in ignore_themes)
638 )
640 def get_error_message(self, **kwargs: Any) -> str:
641 """
642 Get the error message and return it.
644 If the serve_traceback setting is true (debug mode is activated),
645 the traceback gets returned.
646 """
647 if "exc_info" in kwargs and not issubclass(
648 kwargs["exc_info"][0], HTTPError
649 ):
650 if self.settings.get("serve_traceback") or self.is_authorized(
651 Permission.TRACEBACK
652 ):
653 return "".join(
654 traceback.format_exception(*kwargs["exc_info"])
655 ).strip()
656 return "".join(
657 traceback.format_exception_only(*kwargs["exc_info"][:2])
658 ).strip()
659 if "exc_info" in kwargs and issubclass(
660 kwargs["exc_info"][0], MissingArgumentError
661 ):
662 return cast(str, kwargs["exc_info"][1].log_message)
663 return str(self._reason)
665 def get_error_page_description(self, status_code: int) -> str:
666 """Get the description for the error page."""
667 # pylint: disable=too-many-return-statements
668 # https://developer.mozilla.org/docs/Web/HTTP/Status
669 if 100 <= status_code <= 199:
670 return "Hier gibt es eine total wichtige Information."
671 if 200 <= status_code <= 299:
672 return "Hier ist alles super! 🎶🎶"
673 if 300 <= status_code <= 399:
674 return "Eine Umleitung ist eingerichtet."
675 if 400 <= status_code <= 499:
676 if status_code == 404:
677 return f"{self.request.path} wurde nicht gefunden."
678 if status_code == 451:
679 return "Hier wäre bestimmt geiler Scheiß."
680 return "Ein Client-Fehler ist aufgetreten."
681 if 500 <= status_code <= 599:
682 return "Ein Server-Fehler ist aufgetreten."
683 raise ValueError(
684 f"{status_code} is not a valid HTTP response status code."
685 )
687 def get_int_argument(
688 self,
689 name: str,
690 default: None | int = None,
691 *,
692 max_: None | int = None,
693 min_: None | int = None,
694 ) -> int:
695 """Get an argument parsed as integer."""
696 if default is None:
697 str_value = self.get_argument(name)
698 try:
699 value = int(str_value, base=0)
700 except ValueError as err:
701 raise HTTPError(400, f"{str_value} is not an integer") from err
702 elif self.get_argument(name, ""):
703 try:
704 value = int(self.get_argument(name), base=0)
705 except ValueError:
706 value = default
707 else:
708 value = default
710 if max_ is not None:
711 value = min(max_, value)
712 if min_ is not None:
713 value = max(min_, value)
715 return value
717 def get_module_infos(self) -> tuple[ModuleInfo, ...]:
718 """Get the module infos."""
719 return self.settings.get("MODULE_INFOS") or ()
721 def get_reporting_api_endpoint(self) -> None | str:
722 """Get the endpoint for the Reporting API™️."""
723 if not self.settings.get("REPORTING"):
724 return None
725 endpoint = self.settings.get("REPORTING_ENDPOINT")
727 if not endpoint or not endpoint.startswith("/"):
728 return endpoint
730 return f"{self.request.protocol}://{self.request.host}{endpoint}"
732 @override
733 def get_template_namespace(self) -> dict[str, Any]:
734 """
735 Add useful things to the template namespace and return it.
737 They are mostly needed by most of the pages (like title,
738 description and no_3rd_party).
739 """
740 namespace = super().get_template_namespace()
741 ansi2html = partial(
742 Ansi2HTMLConverter(inline=True, scheme="xterm").convert, full=False
743 )
744 namespace.update(self.user_settings.as_dict())
745 namespace.update(
746 ansi2html=partial(
747 reduce, apply, (ansi2html, ansi_replace, backspace_replace)
748 ),
749 apm_script=(
750 self.settings["ELASTIC_APM"].get("INLINE_SCRIPT")
751 if self.apm_enabled
752 else None
753 ),
754 as_html=self.content_type == "text/html",
755 c=self.now.date() == date(self.now.year, 4, 1)
756 or str_to_bool(self.get_cookie("c", "f") or "f", False),
757 canonical_url=self.request.protocol
758 + "://"
759 + (self.settings["DOMAIN"] or self.request.host)
760 + self.fix_url(
761 self.request.full_url().upper()
762 if self.request.path.upper().startswith("/LOLWUT")
763 else self.request.full_url().lower()
764 )
765 .split("?")[0]
766 .removesuffix("/"),
767 description=self.description,
768 display_theme=self.get_display_theme(),
769 display_scheme=self.get_display_scheme(),
770 elastic_rum_url=self.ELASTIC_RUM_URL,
771 fix_static=lambda path: self.fix_url(fix_static_path(path)),
772 fix_url=self.fix_url,
773 emoji2html=(
774 emoji2html
775 if self.user_settings.openmoji == "img"
776 else (
777 (lambda emoji: f'<span class="openmoji">{emoji}</span>')
778 if self.user_settings.openmoji
779 else (lambda emoji: f"<span>{emoji}</span>")
780 )
781 ),
782 form_appendix=self.user_settings.get_form_appendix(),
783 GH_ORG_URL=GH_ORG_URL,
784 GH_PAGES_URL=GH_PAGES_URL,
785 GH_REPO_URL=GH_REPO_URL,
786 keywords="Asoziales Netzwerk, Känguru-Chroniken"
787 + (
788 f", {self.module_info.get_keywords_as_str(self.request.path)}"
789 if self.module_info # type: ignore[truthy-bool]
790 else ""
791 ),
792 lang="de", # TODO: add language support
793 nonce=self.nonce,
794 now=self.now,
795 openmoji_version=OPENMOJI_VERSION,
796 settings=self.settings,
797 short_title=self.short_title,
798 testing=pytest_is_running(),
799 title=self.title,
800 )
801 namespace.update(
802 {
803 "🥚": timedelta()
804 <= self.now.date() - easter(self.now.year)
805 < timedelta(days=2),
806 "🦘": is_prime(self.now.microsecond),
807 }
808 )
809 return namespace
811 def get_user_id(self) -> str:
812 """Get the user id saved in the cookie or create one."""
813 cookie = self.get_secure_cookie(
814 "user_id",
815 max_age_days=90,
816 min_version=2,
817 )
819 user_id = cookie.decode("UTF-8") if cookie else str(uuid.uuid4())
821 if not self.get_secure_cookie( # save it in cookie or reset expiry date
822 "user_id", max_age_days=30, min_version=2
823 ):
824 self.set_secure_cookie(
825 "user_id",
826 user_id,
827 expires_days=90,
828 path="/",
829 samesite="Strict",
830 )
832 return user_id
834 def handle_accept_header( # pylint: disable=inconsistent-return-statements
835 self, possible_content_types: tuple[str, ...], strict: bool = True
836 ) -> None:
837 """Handle the Accept header and set `self.content_type`."""
838 if not possible_content_types:
839 return
840 content_type = get_best_match(
841 self.request.headers.get("Accept") or "*/*",
842 possible_content_types,
843 )
844 if content_type is None:
845 if strict:
846 return self.handle_not_acceptable(possible_content_types)
847 content_type = possible_content_types[0]
848 self.content_type = content_type
849 self.set_content_type_header()
851 def handle_not_acceptable(
852 self, possible_content_types: tuple[str, ...]
853 ) -> None:
854 """Only call this if we cannot respect the Accept header."""
855 self.clear_header("Content-Type")
856 self.set_status(406)
857 raise Finish("\n".join(possible_content_types) + "\n")
859 def head(self, *args: Any, **kwargs: Any) -> None | Awaitable[None]:
860 """Handle HEAD requests."""
861 if self.get.__module__ == "tornado.web":
862 raise HTTPError(405)
863 if not self.supports_head():
864 raise HTTPError(501)
866 kwargs["head"] = True
867 return self.get(*args, **kwargs)
869 @override
870 def initialize(
871 self,
872 *,
873 module_info: ModuleInfo,
874 # default is true, because then empty args dicts are
875 # enough to specify that the defaults should be used
876 default_title: bool = True,
877 default_description: bool = True,
878 ) -> None:
879 """
880 Get title and description from the kwargs.
882 If title and description are present in the kwargs,
883 then they override self.title and self.description.
884 """
885 self.module_info = module_info
886 if not default_title:
887 page_info = self.module_info.get_page_info(self.request.path)
888 self.title = page_info.name
889 self.short_title = page_info.short_name or self.title
890 if not default_description:
891 self.description = self.module_info.get_page_info(
892 self.request.path
893 ).description
895 @override
896 async def options(self, *args: Any, **kwargs: Any) -> None:
897 """Handle OPTIONS requests."""
898 # pylint: disable=unused-argument
899 self.set_header("Allow", ", ".join(self.get_allowed_methods()))
900 self.set_status(204)
901 await self.finish()
903 def origin_trial(self, token: bytes | str) -> bool:
904 """Enable an experimental feature."""
905 # pylint: disable=protected-access
906 payload = json.loads(b64decode(token)[69:])
907 if payload["feature"] in self.active_origin_trials:
908 return True
909 origin = urlsplit(payload["origin"])
910 url = urlsplit(self.request.full_url())
911 if url.port is None and url.scheme in {"http", "https"}:
912 url = url._replace(
913 netloc=f"{url.hostname}:{443 if url.scheme == 'https' else 80}"
914 )
915 if self.request._start_time > payload["expiry"]:
916 return False
917 if url.scheme != origin.scheme:
918 return False
919 if url.netloc != origin.netloc and not (
920 payload.get("isSubdomain")
921 and url.netloc.endswith(f".{origin.netloc}")
922 ):
923 return False
924 self.add_header("Origin-Trial", token)
925 self.active_origin_trials.add(payload["feature"])
926 return True
928 @override
929 async def prepare(self) -> None:
930 """Check authorization and call self.ratelimit()."""
931 await super().prepare()
933 if self._finished:
934 return
936 if not self.ALLOW_COMPRESSION:
937 for transform in self._transforms:
938 if isinstance(transform, GZipContentEncoding):
939 # pylint: disable=protected-access
940 transform._gzipping = False
942 self.handle_accept_header(self.POSSIBLE_CONTENT_TYPES)
944 if self.request.method == "GET" and (
945 days := Random(self.now.timestamp()).randint(0, 31337)
946 ) in {
947 69,
948 420,
949 1337,
950 31337,
951 }:
952 self.set_cookie("c", "s", expires_days=days / 24, path="/")
954 if (
955 self.request.method != "OPTIONS"
956 and self.MAX_BODY_SIZE is not None
957 and len(self.request.body) > self.MAX_BODY_SIZE
958 ):
959 LOGGER.warning(
960 "%s > MAX_BODY_SIZE (%s)",
961 len(self.request.body),
962 self.MAX_BODY_SIZE,
963 )
964 raise HTTPError(413)
966 @override
967 def render( # noqa: D102
968 self, template_name: str, **kwargs: Any
969 ) -> Future[None]:
970 self.used_render = True
971 return super().render(template_name, **kwargs)
973 render.__doc__ = _RequestHandler.render.__doc__
975 def set_content_type_header(self) -> None:
976 """Set the Content-Type header based on `self.content_type`."""
977 if str(self.content_type).startswith("text/"): # RFC 2616 (3.7.1)
978 self.set_header(
979 "Content-Type", f"{self.content_type};charset=utf-8"
980 )
981 elif self.content_type is not None:
982 self.set_header("Content-Type", self.content_type)
984 @override
985 def set_cookie( # noqa: D102 # pylint: disable=too-many-arguments
986 self,
987 name: str,
988 value: str | bytes,
989 domain: None | str = None,
990 expires: None | float | tuple[int, ...] | datetime = None,
991 path: str = "/",
992 expires_days: None | float = 400, # changed
993 *,
994 secure: bool | None = None,
995 httponly: bool = True,
996 **kwargs: Any,
997 ) -> None:
998 if "samesite" not in kwargs:
999 # default for same site should be strict
1000 kwargs["samesite"] = "Strict"
1002 super().set_cookie(
1003 name,
1004 value,
1005 domain,
1006 expires,
1007 path,
1008 expires_days,
1009 secure=(
1010 self.request.protocol == "https" if secure is None else secure
1011 ),
1012 httponly=httponly,
1013 **kwargs,
1014 )
1016 set_cookie.__doc__ = _RequestHandler.set_cookie.__doc__
1018 def set_csp_header(self) -> None:
1019 """Set the Content-Security-Policy header."""
1020 self.nonce = secrets.token_urlsafe(16)
1022 script_src = ["'self'", f"'nonce-{self.nonce}'"]
1024 if (
1025 self.apm_enabled
1026 and "INLINE_SCRIPT_HASH" in self.settings["ELASTIC_APM"]
1027 ):
1028 script_src.extend(
1029 (
1030 f"'sha256-{self.settings['ELASTIC_APM']['INLINE_SCRIPT_HASH']}'",
1031 "'unsafe-inline'", # for browsers that don't support hash
1032 )
1033 )
1035 connect_src = ["'self'"]
1037 if self.apm_enabled and "SERVER_URL" in self.settings["ELASTIC_APM"]:
1038 rum_server_url = self.settings["ELASTIC_APM"].get("RUM_SERVER_URL")
1039 if rum_server_url:
1040 # the RUM agent needs to connect to rum_server_url
1041 connect_src.append(rum_server_url)
1042 elif rum_server_url is None:
1043 # the RUM agent needs to connect to ["ELASTIC_APM"]["SERVER_URL"]
1044 connect_src.append(self.settings["ELASTIC_APM"]["SERVER_URL"])
1046 connect_src.append( # fix for older browsers
1047 ("wss" if self.request.protocol == "https" else "ws")
1048 + f"://{self.request.host}"
1049 )
1051 self.set_header(
1052 "Content-Security-Policy",
1053 "default-src 'self';"
1054 f"script-src {' '.join(script_src)};"
1055 f"connect-src {' '.join(connect_src)};"
1056 "style-src 'self' 'unsafe-inline';"
1057 "img-src 'self' https://img.zeit.de https://github.asozial.org;"
1058 "frame-ancestors 'self';"
1059 "sandbox allow-downloads allow-same-origin allow-modals"
1060 " allow-popups-to-escape-sandbox allow-scripts allow-popups"
1061 " allow-top-navigation-by-user-activation allow-forms;"
1062 "report-to default;"
1063 "base-uri 'none';"
1064 + (
1065 f"report-uri {self.get_reporting_api_endpoint()};"
1066 if self.settings.get("REPORTING")
1067 else ""
1068 ),
1069 )
1071 @override
1072 def set_default_headers(self) -> None:
1073 """Set default headers."""
1074 self.set_csp_header()
1075 self.active_origin_trials = set()
1076 if self.settings.get("REPORTING"):
1077 endpoint = self.get_reporting_api_endpoint()
1078 self.set_header(
1079 "Reporting-Endpoints",
1080 f'default="{endpoint}"', # noqa: B907
1081 )
1082 self.set_header(
1083 "Report-To",
1084 json.dumps(
1085 {
1086 "group": "default",
1087 "max_age": 2592000,
1088 "endpoints": [{"url": endpoint}],
1089 },
1090 option=ORJSON_OPTIONS,
1091 ),
1092 )
1093 self.set_header("NEL", '{"report_to":"default","max_age":2592000}')
1094 self.set_header("X-Content-Type-Options", "nosniff")
1095 self.set_header("Access-Control-Max-Age", "7200")
1096 self.set_header("Access-Control-Allow-Origin", "*")
1097 self.set_header("Access-Control-Allow-Headers", "*")
1098 self.set_header(
1099 "Access-Control-Allow-Methods",
1100 ", ".join(self.get_allowed_methods()),
1101 )
1102 self.set_header("Cross-Origin-Resource-Policy", "cross-origin")
1103 self.set_header(
1104 "Permissions-Policy",
1105 "browsing-topics=(),"
1106 "identity-credentials-get=(),"
1107 "join-ad-interest-group=(),"
1108 "private-state-token-issuance=(),"
1109 "private-state-token-redemption=(),"
1110 "run-ad-auction=()",
1111 )
1112 self.set_header("Referrer-Policy", "same-origin")
1113 self.set_header(
1114 "Cross-Origin-Opener-Policy", "same-origin;report-to=default"
1115 )
1116 if self.request.path == "/kaenguru-comics-alt": # TODO: improve this
1117 self.set_header(
1118 "Cross-Origin-Embedder-Policy",
1119 "credentialless;report-to=default",
1120 )
1121 else:
1122 self.set_header(
1123 "Cross-Origin-Embedder-Policy",
1124 "require-corp;report-to=default",
1125 )
1126 if self.settings.get("HSTS"):
1127 self.set_header("Strict-Transport-Security", "max-age=63072000")
1128 if (
1129 onion_address := self.settings.get("ONION_ADDRESS")
1130 ) and not self.request.host_name.endswith(".onion"):
1131 self.set_header(
1132 "Onion-Location",
1133 onion_address
1134 + self.request.path
1135 + (f"?{self.request.query}" if self.request.query else ""),
1136 )
1137 if self.settings.get("debug"):
1138 self.set_header("X-Debug", bool_to_str(True))
1139 for permission in Permission:
1140 if permission.name:
1141 self.set_header(
1142 f"X-Permission-{permission.name}",
1143 bool_to_str(bool(self.is_authorized(permission))),
1144 )
1145 self.set_header(
1146 "X-Clacks-Overhead",
1147 CLACKS_OVERHEADS[
1148 int(self.now_utc.microsecond) % len(CLACKS_OVERHEADS)
1149 ],
1150 )
1151 self.set_header("Accept-CH", "Sec-CH-Prefers-Reduced-Motion")
1152 self.set_header("Critical-CH", "Sec-CH-Prefers-Reduced-Motion")
1153 self.set_header(
1154 "Vary", "Accept,Authorization,Cookie,Sec-CH-Prefers-Reduced-Motion"
1155 )
1157 set_default_headers.__doc__ = _RequestHandler.set_default_headers.__doc__
1159 @classmethod
1160 def supports_head(cls) -> bool:
1161 """Check whether this request handler supports HEAD requests."""
1162 signature = inspect.signature(cls.get)
1163 return (
1164 "head" in signature.parameters
1165 and signature.parameters["head"].kind
1166 == inspect.Parameter.KEYWORD_ONLY
1167 )
1169 @cached_property
1170 def user_settings(self) -> Options:
1171 """Get the user settings."""
1172 return Options(self)
1174 @override
1175 def write(self, chunk: str | bytes | dict[str, Any]) -> None: # noqa: D102
1176 if self._finished:
1177 raise RuntimeError("Cannot write() after finish()")
1179 self.set_content_type_header()
1181 if isinstance(chunk, dict):
1182 chunk = self.dump(chunk)
1184 if self.now.date() == date(self.now.year, 4, 27):
1185 if isinstance(chunk, bytes):
1186 with contextlib.suppress(UnicodeDecodeError):
1187 chunk = chunk.decode("UTF-8")
1188 if isinstance(chunk, str):
1189 chunk = regex.sub(
1190 r"\b\p{Lu}\p{Ll}{4}\p{Ll}*\b",
1191 lambda match: (
1192 "Stanley"
1193 if Random(match[0]).randrange(5) == self.now.year % 5
1194 else match[0]
1195 ),
1196 chunk,
1197 )
1199 super().write(chunk)
1201 write.__doc__ = _RequestHandler.write.__doc__
1203 @override
1204 def write_error(self, status_code: int, **kwargs: Any) -> None:
1205 """Render the error page."""
1206 dict_content_types: tuple[str, str] = (
1207 "application/json",
1208 "application/yaml",
1209 )
1210 all_error_content_types: tuple[str, ...] = (
1211 # text/plain as first (default), to not screw up output in terminals
1212 "text/plain",
1213 "text/html",
1214 "text/markdown",
1215 *dict_content_types,
1216 "application/vnd.asozial.dynload+json",
1217 )
1219 if self.content_type not in all_error_content_types:
1220 # don't send 406, instead default with text/plain
1221 self.handle_accept_header(all_error_content_types, strict=False)
1223 if self.content_type == "text/html":
1224 self.render( # type: ignore[unused-awaitable]
1225 "error.html",
1226 status=status_code,
1227 reason=self.get_error_message(**kwargs),
1228 description=self.get_error_page_description(status_code),
1229 is_traceback="exc_info" in kwargs
1230 and not issubclass(kwargs["exc_info"][0], HTTPError)
1231 and (
1232 self.settings.get("serve_traceback")
1233 or self.is_authorized(Permission.TRACEBACK)
1234 ),
1235 )
1236 return
1238 if self.content_type in dict_content_types:
1239 self.finish( # type: ignore[unused-awaitable]
1240 {
1241 "status": status_code,
1242 "reason": self.get_error_message(**kwargs),
1243 }
1244 )
1245 return
1247 self.finish( # type: ignore[unused-awaitable]
1248 f"{status_code} {self.get_error_message(**kwargs)}\n"
1249 )
1251 write_error.__doc__ = _RequestHandler.write_error.__doc__