Coverage for an_website/utils/base_request_handler.py: 77.869%
488 statements
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-01 14:47 +0000
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-01 14:47 +0000
1# This program is free software: you can redistribute it and/or modify
2# it under the terms of the GNU Affero General Public License as
3# published by the Free Software Foundation, either version 3 of the
4# License, or (at your option) any later version.
5#
6# This program is distributed in the hope that it will be useful,
7# but WITHOUT ANY WARRANTY; without even the implied warranty of
8# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9# GNU Affero General Public License for more details.
10#
11# You should have received a copy of the GNU Affero General Public License
12# along with this program. If not, see <https://www.gnu.org/licenses/>.
13# pylint: disable=too-many-lines
15"""
16The base request handler used by other modules.
18This should only contain the BaseRequestHandler class.
19"""
21from __future__ import annotations
23import contextlib
24import inspect
25import logging
26import secrets
27import sys
28import traceback
29import uuid
30from asyncio import Future
31from base64 import b64decode
32from collections.abc import Awaitable, Callable, Coroutine
33from contextvars import ContextVar
34from datetime import date, datetime, timedelta, timezone, tzinfo
35from functools import cached_property, partial, reduce
36from random import Random, choice as random_choice
37from types import TracebackType
38from typing import Any, ClassVar, Final, cast, override
39from urllib.parse import SplitResult, urlsplit, urlunsplit
40from zoneinfo import ZoneInfo
42import elasticapm
43import html2text
44import orjson as json
45import regex
46import tornado.web
47import yaml
48from accept_types import get_best_match # type: ignore[import-untyped]
49from ansi2html import Ansi2HTMLConverter
50from bs4 import BeautifulSoup
51from dateutil.easter import easter
52from elastic_transport import ApiError, TransportError
53from elasticsearch import AsyncElasticsearch
54from openmoji_dist import VERSION as OPENMOJI_VERSION
55from redis.asyncio import Redis
56from tornado.httputil import HTTPServerRequest
57from tornado.iostream import StreamClosedError
58from tornado.web import (
59 Finish,
60 GZipContentEncoding,
61 HTTPError,
62 MissingArgumentError,
63 OutputTransform,
64)
66from .. import (
67 EVENT_ELASTICSEARCH,
68 EVENT_REDIS,
69 GH_ORG_URL,
70 GH_PAGES_URL,
71 GH_REPO_URL,
72 NAME,
73 ORJSON_OPTIONS,
74 pytest_is_running,
75)
76from .decorators import is_authorized
77from .options import ColourScheme, Options
78from .static_file_handling import FILE_HASHES_DICT, fix_static_path
79from .themes import THEMES
80from .utils import (
81 ModuleInfo,
82 Permission,
83 add_args_to_url,
84 ansi_replace,
85 apply,
86 backspace_replace,
87 bool_to_str,
88 emoji2html,
89 geoip,
90 hash_bytes,
91 is_prime,
92 ratelimit,
93 str_to_bool,
94)
96LOGGER: Final = logging.getLogger(__name__)
98TEXT_CONTENT_TYPES: Final[set[str]] = {
99 "application/javascript",
100 "application/json",
101 "application/vnd.asozial.dynload+json",
102 "application/x-ndjson",
103 "application/xml",
104 "application/yaml",
105}
107request_ctx_var: ContextVar[HTTPServerRequest] = ContextVar("current_request")
110class _RequestHandler(tornado.web.RequestHandler):
111 """Base for tornado request handlers."""
113 @override
114 async def _execute(
115 self, transforms: list[OutputTransform], *args: bytes, **kwargs: bytes
116 ) -> None:
117 request_ctx_var.set(self.request)
118 return await super()._execute(transforms, *args, **kwargs)
120 # pylint: disable-next=protected-access
121 _execute.__doc__ = tornado.web.RequestHandler._execute.__doc__
123 @override
124 def data_received( # noqa: D102
125 self, chunk: bytes
126 ) -> None | Awaitable[None]:
127 pass
129 data_received.__doc__ = tornado.web.RequestHandler.data_received.__doc__
131 @override
132 def log_exception(
133 self,
134 typ: None | type[BaseException],
135 value: None | BaseException,
136 tb: None | TracebackType,
137 ) -> None:
138 if isinstance(value, HTTPError):
139 super().log_exception(typ, value, tb)
140 elif typ is StreamClosedError:
141 LOGGER.debug(
142 "Stream closed %s",
143 self._request_summary(),
144 exc_info=(typ, value, tb), # type: ignore[arg-type]
145 )
146 else:
147 LOGGER.error(
148 "Uncaught exception %s",
149 self._request_summary(),
150 exc_info=(typ, value, tb), # type: ignore[arg-type]
151 )
153 log_exception.__doc__ = tornado.web.RequestHandler.log_exception.__doc__
156class BaseRequestHandler(_RequestHandler):
157 """The base request handler used by every page and API."""
159 # pylint: disable=too-many-instance-attributes, too-many-public-methods
161 ELASTIC_RUM_URL: ClassVar[str] = (
162 "/@elastic/apm-rum@5.12.0/dist/bundles/elastic-apm-rum"
163 f".umd{'.min' if not sys.flags.dev_mode else ''}.js"
164 )
166 COMPUTE_ETAG: ClassVar[bool] = True
167 ALLOW_COMPRESSION: ClassVar[bool] = True
168 MAX_BODY_SIZE: ClassVar[None | int] = None
169 ALLOWED_METHODS: ClassVar[tuple[str, ...]] = ("GET",)
170 POSSIBLE_CONTENT_TYPES: ClassVar[tuple[str, ...]] = ()
172 module_info: ModuleInfo
173 # info about page, can be overridden in module_info
174 title: str = "Das Asoziale Netzwerk"
175 short_title: str = "Asoziales Netzwerk"
176 description: str = "Die tolle Webseite des Asozialen Netzwerks"
178 used_render: bool = False
180 active_origin_trials: set[str]
181 content_type: None | str = None
182 apm_script: None | str
183 crawler: bool = False
184 nonce: str
186 def _finish(
187 self, chunk: None | str | bytes | dict[str, Any] = None
188 ) -> Future[None]:
189 if self._finished:
190 raise RuntimeError("finish() called twice")
192 if chunk is not None:
193 self.write(chunk)
195 if ( # pylint: disable=too-many-boolean-expressions
196 (content_type := self.content_type)
197 and (
198 content_type in TEXT_CONTENT_TYPES
199 or content_type.startswith("text/")
200 or content_type.endswith(("+xml", "+json"))
201 )
202 and self._write_buffer
203 and not self._write_buffer[-1].endswith(b"\n")
204 ):
205 self.write(b"\n")
207 return super().finish()
209 @property
210 def apm_client(self) -> None | elasticapm.Client:
211 """Get the APM client from the settings."""
212 return self.settings.get("ELASTIC_APM", {}).get("CLIENT") # type: ignore[no-any-return]
214 @property
215 def apm_enabled(self) -> bool:
216 """Return whether APM is enabled."""
217 return bool(self.settings.get("ELASTIC_APM", {}).get("ENABLED"))
219 @override
220 def compute_etag(self) -> None | str:
221 """Compute ETag with Base85 encoding."""
222 if not self.COMPUTE_ETAG:
223 return None
224 return f'"{hash_bytes(*self._write_buffer)}"' # noqa: B907
226 @override
227 def data_received( # noqa: D102
228 self, chunk: bytes
229 ) -> None | Awaitable[None]:
230 pass
232 @override
233 def decode_argument( # noqa: D102
234 self, value: bytes, name: str | None = None
235 ) -> str:
236 try:
237 return value.decode("UTF-8", "replace")
238 except UnicodeDecodeError as exc:
239 err_msg = f"Invalid unicode in {name or 'url'}: {value[:40]!r}"
240 LOGGER.exception(err_msg, exc_info=exc)
241 raise HTTPError(400, err_msg) from exc
243 @property
244 def dump(self) -> Callable[[Any], str | bytes]:
245 """Get the function for dumping the output."""
246 yaml_subset = self.content_type in {
247 "application/json",
248 "application/vnd.asozial.dynload+json",
249 }
251 if self.content_type == "application/yaml":
252 if self.now.timetuple()[2:0:-1] == (1, 4):
253 yaml_subset = True
254 else:
255 return lambda spam: yaml.dump(
256 spam,
257 width=self.get_int_argument("yaml_width", 80, min_=80),
258 )
260 if yaml_subset:
261 option = ORJSON_OPTIONS
262 if self.get_bool_argument("pretty", False):
263 option |= json.OPT_INDENT_2
264 return lambda spam: json.dumps(spam, option=option)
266 return lambda spam: spam
268 @property
269 def elasticsearch(self) -> AsyncElasticsearch:
270 """
271 Get the Elasticsearch client from the settings.
273 This is None if Elasticsearch is not enabled.
274 """
275 return cast(AsyncElasticsearch, self.settings.get("ELASTICSEARCH"))
277 @property
278 def elasticsearch_prefix(self) -> str:
279 """Get the Elasticsearch prefix from the settings."""
280 return self.settings.get( # type: ignore[no-any-return]
281 "ELASTICSEARCH_PREFIX", NAME
282 )
284 @override
285 def finish( # noqa: D102
286 self, chunk: None | str | bytes | dict[Any, Any] = None
287 ) -> Future[None]:
288 as_json = self.content_type == "application/vnd.asozial.dynload+json"
289 as_plain_text = self.content_type == "text/plain"
290 as_markdown = self.content_type == "text/markdown"
292 if (
293 not isinstance(chunk, bytes | str)
294 or self.content_type == "text/html"
295 or not self.used_render
296 or not (as_json or as_plain_text or as_markdown)
297 ):
298 return self._finish(chunk)
300 chunk = chunk.decode("UTF-8") if isinstance(chunk, bytes) else chunk
302 if as_markdown:
303 return self._finish(
304 f"# {self.title}\n\n"
305 + html2text.html2text(chunk, self.request.full_url()).strip()
306 )
308 soup = BeautifulSoup(chunk, features="lxml")
310 if as_plain_text:
311 return self._finish(soup.get_text("\n", True))
313 dictionary: dict[str, object] = {
314 "url": self.fix_url(),
315 "title": self.title,
316 "short_title": (
317 self.short_title if self.title != self.short_title else None
318 ),
319 "body": "".join(
320 str(element)
321 for element in soup.find_all(name="main")[0].contents
322 ).strip(),
323 "scripts": [
324 {"script": script.string} | script.attrs
325 for script in soup.find_all("script")
326 ],
327 "stylesheets": [
328 stylesheet.get("href").strip()
329 for stylesheet in soup.find_all("link", rel="stylesheet")
330 ],
331 "css": "\n".join(style.string for style in soup.find_all("style")),
332 }
334 return self._finish(dictionary)
336 finish.__doc__ = _RequestHandler.finish.__doc__
338 def finish_dict(self, **kwargs: Any) -> Future[None]:
339 """Finish the request with a dictionary."""
340 return self.finish(kwargs)
342 def fix_url(
343 self,
344 url: None | str | SplitResult = None,
345 new_path: None | str = None,
346 **query_args: None | str | bool | float,
347 ) -> str:
348 """
349 Fix a URL and return it.
351 If the URL is from another website, link to it with the redirect page,
352 otherwise just return the URL with no_3rd_party appended.
353 """
354 if url is None:
355 url = self.request.full_url()
356 if isinstance(url, str):
357 url = urlsplit(url)
358 if url.netloc and url.netloc.lower() != self.request.host.lower():
359 if (
360 not self.user_settings.ask_before_leaving
361 or not self.settings.get("REDIRECT_MODULE_LOADED")
362 ):
363 return url.geturl()
364 path = "/redirect"
365 query_args["to"] = url.geturl()
366 url = urlsplit(self.request.full_url())
367 else:
368 path = url.path if new_path is None else new_path
369 path = f"/{path.strip('/')}".lower()
370 if path == "/lolwut":
371 path = path.upper()
372 if path.startswith("/soundboard/files/") or path in FILE_HASHES_DICT:
373 query_args.update(
374 dict.fromkeys(self.user_settings.iter_option_names())
375 )
376 else:
377 for (
378 key,
379 value,
380 ) in self.user_settings.as_dict_with_str_values().items():
381 query_args.setdefault(key, value)
382 for key, value in self.user_settings.as_dict_with_str_values(
383 include_query_argument=False,
384 include_body_argument=self.request.path == "/einstellungen"
385 and self.get_bool_argument("save_in_cookie", False),
386 ).items():
387 if value == query_args[key]:
388 query_args[key] = None
390 return add_args_to_url(
391 urlunsplit(
392 (
393 self.request.protocol,
394 self.request.host,
395 "" if path == "/" else path,
396 url.query,
397 url.fragment,
398 )
399 ),
400 **query_args,
401 )
403 def geoip(
404 self,
405 ip: None | str = None,
406 database: str = geoip.__defaults__[0], # type: ignore[index]
407 *,
408 allow_fallback: bool = True,
409 ) -> Coroutine[None, None, None | dict[str, Any]]:
410 """Get GeoIP information."""
411 if not ip:
412 ip = self.request.remote_ip
413 if not EVENT_ELASTICSEARCH.is_set():
414 return geoip(ip, database)
415 return geoip(
416 ip, database, self.elasticsearch, allow_fallback=allow_fallback
417 )
419 @classmethod
420 def get_allowed_methods(cls) -> list[str]:
421 """Get allowed methods."""
422 methods = {"OPTIONS", *cls.ALLOWED_METHODS}
423 if "GET" in cls.ALLOWED_METHODS and cls.supports_head():
424 methods.add("HEAD")
425 return sorted(methods)
427 def get_bool_argument(
428 self,
429 name: str,
430 default: None | bool = None,
431 ) -> bool:
432 """Get an argument parsed as boolean."""
433 if default is not None:
434 return str_to_bool(self.get_argument(name, ""), default)
435 value = str(self.get_argument(name))
436 try:
437 return str_to_bool(value)
438 except ValueError as err:
439 raise HTTPError(400, f"{value} is not a boolean") from err
441 def get_display_scheme(self) -> ColourScheme:
442 """Get the scheme currently displayed."""
443 scheme = self.user_settings.scheme
444 if scheme == "random":
445 return ("light", "dark")[self.now.microsecond & 1]
446 return scheme
448 def get_display_theme(self) -> str:
449 """Get the theme currently displayed."""
450 theme = self.user_settings.theme
452 if theme == "default" and self.now.month == 12:
453 return "christmas"
455 if theme != "random":
456 return theme
458 ignore_themes = ("random", "christmas")
460 return random_choice( # nosec: B311
461 tuple(theme for theme in THEMES if theme not in ignore_themes)
462 )
464 def get_error_message(self, **kwargs: Any) -> str:
465 """
466 Get the error message and return it.
468 If the serve_traceback setting is true (debug mode is activated),
469 the traceback gets returned.
470 """
471 if "exc_info" in kwargs and not issubclass(
472 kwargs["exc_info"][0], HTTPError
473 ):
474 if self.settings.get("serve_traceback") or self.is_authorized(
475 Permission.TRACEBACK
476 ):
477 return "".join(
478 traceback.format_exception(*kwargs["exc_info"])
479 ).strip()
480 return "".join(
481 traceback.format_exception_only(*kwargs["exc_info"][:2])
482 ).strip()
483 if "exc_info" in kwargs and issubclass(
484 kwargs["exc_info"][0], MissingArgumentError
485 ):
486 return cast(str, kwargs["exc_info"][1].log_message)
487 return str(self._reason)
489 def get_error_page_description(self, status_code: int) -> str:
490 """Get the description for the error page."""
491 # pylint: disable=too-many-return-statements
492 # https://developer.mozilla.org/docs/Web/HTTP/Status
493 if 100 <= status_code <= 199:
494 return "Hier gibt es eine total wichtige Information."
495 if 200 <= status_code <= 299:
496 return "Hier ist alles super! 🎶🎶"
497 if 300 <= status_code <= 399:
498 return "Eine Umleitung ist eingerichtet."
499 if 400 <= status_code <= 499:
500 if status_code == 404:
501 return f"{self.request.path} wurde nicht gefunden."
502 if status_code == 451:
503 return "Hier wäre bestimmt geiler Scheiß."
504 return "Ein Client-Fehler ist aufgetreten."
505 if 500 <= status_code <= 599:
506 return "Ein Server-Fehler ist aufgetreten."
507 raise ValueError(
508 f"{status_code} is not a valid HTTP response status code."
509 )
511 def get_int_argument(
512 self,
513 name: str,
514 default: None | int = None,
515 *,
516 max_: None | int = None,
517 min_: None | int = None,
518 ) -> int:
519 """Get an argument parsed as integer."""
520 if default is None:
521 str_value = self.get_argument(name)
522 try:
523 value = int(str_value, base=0)
524 except ValueError as err:
525 raise HTTPError(400, f"{str_value} is not an integer") from err
526 elif self.get_argument(name, ""):
527 try:
528 value = int(self.get_argument(name), base=0)
529 except ValueError:
530 value = default
531 else:
532 value = default
534 if max_ is not None:
535 value = min(max_, value)
536 if min_ is not None:
537 value = max(min_, value)
539 return value
541 def get_module_infos(self) -> tuple[ModuleInfo, ...]:
542 """Get the module infos."""
543 return self.settings.get("MODULE_INFOS") or ()
545 def get_reporting_api_endpoint(self) -> None | str:
546 """Get the endpoint for the Reporting API™️."""
547 if not self.settings.get("REPORTING"):
548 return None
549 endpoint = self.settings.get("REPORTING_ENDPOINT")
551 if not endpoint or not endpoint.startswith("/"):
552 return endpoint
554 return f"{self.request.protocol}://{self.request.host}{endpoint}"
556 @override
557 def get_template_namespace(self) -> dict[str, Any]:
558 """
559 Add useful things to the template namespace and return it.
561 They are mostly needed by most of the pages (like title,
562 description and no_3rd_party).
563 """
564 namespace = super().get_template_namespace()
565 ansi2html = partial(
566 Ansi2HTMLConverter(inline=True, scheme="xterm").convert, full=False
567 )
568 namespace.update(self.user_settings.as_dict())
569 namespace.update(
570 ansi2html=partial(
571 reduce, apply, (ansi2html, ansi_replace, backspace_replace)
572 ),
573 apm_script=(
574 self.settings["ELASTIC_APM"].get("INLINE_SCRIPT")
575 if self.apm_enabled
576 else None
577 ),
578 as_html=self.content_type == "text/html",
579 c=self.now.date() == date(self.now.year, 4, 1)
580 or str_to_bool(self.get_cookie("c", "f") or "f", False),
581 canonical_url=self.fix_url(
582 self.request.full_url().upper()
583 if self.request.path.upper().startswith("/LOLWUT")
584 else self.request.full_url().lower()
585 ).split("?")[0],
586 description=self.description,
587 display_theme=self.get_display_theme(),
588 display_scheme=self.get_display_scheme(),
589 elastic_rum_url=self.ELASTIC_RUM_URL,
590 fix_static=lambda path: self.fix_url(fix_static_path(path)),
591 fix_url=self.fix_url,
592 emoji2html=(
593 emoji2html
594 if self.user_settings.openmoji == "img"
595 else (
596 (lambda emoji: f'<span class="openmoji">{emoji}</span>')
597 if self.user_settings.openmoji
598 else (lambda emoji: emoji)
599 )
600 ),
601 form_appendix=self.user_settings.get_form_appendix(),
602 GH_ORG_URL=GH_ORG_URL,
603 GH_PAGES_URL=GH_PAGES_URL,
604 GH_REPO_URL=GH_REPO_URL,
605 keywords="Asoziales Netzwerk, Känguru-Chroniken"
606 + (
607 f", {self.module_info.get_keywords_as_str(self.request.path)}"
608 if self.module_info # type: ignore[truthy-bool]
609 else ""
610 ),
611 lang="de", # TODO: add language support
612 nonce=self.nonce,
613 now=self.now,
614 openmoji_version=OPENMOJI_VERSION,
615 settings=self.settings,
616 short_title=self.short_title,
617 testing=pytest_is_running(),
618 title=self.title,
619 )
620 namespace.update(
621 {
622 "🥚": timedelta()
623 <= self.now.date() - easter(self.now.year)
624 < timedelta(days=2),
625 "🦘": is_prime(self.now.microsecond),
626 }
627 )
628 return namespace
630 async def get_time(self) -> datetime:
631 """Get the start time of the request in the users' timezone."""
632 tz: tzinfo = timezone.utc
633 try:
634 geoip = await self.geoip() # pylint: disable=redefined-outer-name
635 except (ApiError, TransportError):
636 LOGGER.exception("Elasticsearch request failed")
637 if self.apm_client:
638 self.apm_client.capture_exception() # type: ignore[no-untyped-call]
639 else:
640 if geoip and "timezone" in geoip:
641 tz = ZoneInfo(geoip["timezone"])
642 return datetime.fromtimestamp(
643 self.request._start_time, tz=tz # pylint: disable=protected-access
644 )
646 def get_user_id(self) -> str:
647 """Get the user id saved in the cookie or create one."""
648 cookie = self.get_secure_cookie(
649 "user_id",
650 max_age_days=90,
651 min_version=2,
652 )
654 user_id = cookie.decode("UTF-8") if cookie else str(uuid.uuid4())
656 if not self.get_secure_cookie( # save it in cookie or reset expiry date
657 "user_id", max_age_days=30, min_version=2
658 ):
659 self.set_secure_cookie(
660 "user_id",
661 user_id,
662 expires_days=90,
663 path="/",
664 samesite="Strict",
665 )
667 return user_id
669 def handle_accept_header( # pylint: disable=inconsistent-return-statements
670 self, possible_content_types: tuple[str, ...], strict: bool = True
671 ) -> None:
672 """Handle the Accept header and set `self.content_type`."""
673 if not possible_content_types:
674 return
675 content_type = get_best_match(
676 self.request.headers.get("Accept") or "*/*",
677 possible_content_types,
678 )
679 if content_type is None:
680 if strict:
681 return self.handle_not_acceptable(possible_content_types)
682 content_type = possible_content_types[0]
683 self.content_type = content_type
684 self.set_content_type_header()
686 def handle_not_acceptable(
687 self, possible_content_types: tuple[str, ...]
688 ) -> None:
689 """Only call this if we cannot respect the Accept header."""
690 self.clear_header("Content-Type")
691 self.set_status(406)
692 raise Finish("\n".join(possible_content_types) + "\n")
694 def head(self, *args: Any, **kwargs: Any) -> None | Awaitable[None]:
695 """Handle HEAD requests."""
696 if self.get.__module__ == "tornado.web":
697 raise HTTPError(405)
698 if not self.supports_head():
699 raise HTTPError(501)
701 kwargs["head"] = True
702 return self.get(*args, **kwargs)
704 @override
705 def initialize(
706 self,
707 *,
708 module_info: ModuleInfo,
709 # default is true, because then empty args dicts are
710 # enough to specify that the defaults should be used
711 default_title: bool = True,
712 default_description: bool = True,
713 ) -> None:
714 """
715 Get title and description from the kwargs.
717 If title and description are present in the kwargs,
718 then they override self.title and self.description.
719 """
720 self.module_info = module_info
721 if not default_title:
722 page_info = self.module_info.get_page_info(self.request.path)
723 self.title = page_info.name
724 self.short_title = page_info.short_name or self.title
725 if not default_description:
726 self.description = self.module_info.get_page_info(
727 self.request.path
728 ).description
730 def is_authorized(
731 self, permission: Permission, allow_cookie_auth: bool = True
732 ) -> bool | None:
733 """Check whether the request is authorized."""
734 return is_authorized(self, permission, allow_cookie_auth)
736 @cached_property
737 def now(self) -> datetime:
738 """Get the current time."""
739 # pylint: disable=method-hidden
740 if pytest_is_running():
741 raise AssertionError("Now accessed before it was set")
742 if self.request.method in self.SUPPORTED_METHODS:
743 LOGGER.error("Now accessed before it was set", stacklevel=3)
744 return datetime.fromtimestamp(
745 self.request._start_time, # pylint: disable=protected-access
746 tz=timezone.utc,
747 )
749 @override
750 async def options(self, *args: Any, **kwargs: Any) -> None:
751 """Handle OPTIONS requests."""
752 # pylint: disable=unused-argument
753 self.set_header("Allow", ", ".join(self.get_allowed_methods()))
754 self.set_status(204)
755 await self.finish()
757 def origin_trial(self, token: bytes | str) -> bool:
758 """Enable an experimental feature."""
759 # pylint: disable=protected-access
760 payload = json.loads(b64decode(token)[69:])
761 if payload["feature"] in self.active_origin_trials:
762 return True
763 origin = urlsplit(payload["origin"])
764 url = urlsplit(self.request.full_url())
765 if url.port is None and url.scheme in {"http", "https"}:
766 url = url._replace(
767 netloc=f"{url.hostname}:{443 if url.scheme == 'https' else 80}"
768 )
769 if self.request._start_time > payload["expiry"]:
770 return False
771 if url.scheme != origin.scheme:
772 return False
773 if url.netloc != origin.netloc and not (
774 payload.get("isSubdomain")
775 and url.netloc.endswith(f".{origin.netloc}")
776 ):
777 return False
778 self.add_header("Origin-Trial", token)
779 self.active_origin_trials.add(payload["feature"])
780 return True
782 @override
783 async def prepare(self) -> None:
784 """Check authorization and call self.ratelimit()."""
785 # pylint: disable=invalid-overridden-method
786 self.now = await self.get_time()
788 if not self.ALLOW_COMPRESSION:
789 for transform in self._transforms:
790 if isinstance(transform, GZipContentEncoding):
791 # pylint: disable=protected-access
792 transform._gzipping = False
794 if crawler_secret := self.settings.get("CRAWLER_SECRET"):
795 self.crawler = crawler_secret in self.request.headers.get(
796 "User-Agent", ""
797 )
799 self.handle_accept_header(self.POSSIBLE_CONTENT_TYPES)
801 if (
802 self.request.method in {"GET", "HEAD"}
803 and self.redirect_to_canonical_domain()
804 ):
805 return
807 if self.request.method == "GET" and (
808 days := Random(self.now.timestamp()).randint(0, 31337)
809 ) in {
810 69,
811 420,
812 1337,
813 31337,
814 }:
815 self.set_cookie("c", "s", expires_days=days / 24, path="/")
817 if self.request.method != "OPTIONS":
818 if (
819 self.MAX_BODY_SIZE is not None
820 and len(self.request.body) > self.MAX_BODY_SIZE
821 ):
822 LOGGER.warning(
823 "%s > MAX_BODY_SIZE (%s)",
824 len(self.request.body),
825 self.MAX_BODY_SIZE,
826 )
827 raise HTTPError(413)
829 if not await self.ratelimit(True):
830 await self.ratelimit()
832 async def ratelimit(self, global_ratelimit: bool = False) -> bool:
833 """Take b1nzy to space using Redis."""
834 if (
835 not self.settings.get("RATELIMITS")
836 or self.request.method == "OPTIONS"
837 or self.is_authorized(Permission.RATELIMITS)
838 or self.crawler
839 ):
840 return False
842 if not EVENT_REDIS.is_set():
843 LOGGER.warning(
844 (
845 "Ratelimits are enabled, but Redis is not available. "
846 "This can happen shortly after starting the website."
847 ),
848 )
849 raise HTTPError(503)
851 if global_ratelimit:
852 ratelimited, headers = await ratelimit(
853 self.redis,
854 self.redis_prefix,
855 str(self.request.remote_ip),
856 bucket=None,
857 max_burst=99, # limit = 100
858 count_per_period=20, # 20 requests per second
859 period=1,
860 tokens=10 if self.settings.get("UNDER_ATTACK") else 1,
861 )
862 else:
863 method = (
864 "GET" if self.request.method == "HEAD" else self.request.method
865 )
866 if not (limit := getattr(self, f"RATELIMIT_{method}_LIMIT", 0)):
867 return False
868 ratelimited, headers = await ratelimit(
869 self.redis,
870 self.redis_prefix,
871 str(self.request.remote_ip),
872 bucket=getattr(
873 self,
874 f"RATELIMIT_{method}_BUCKET",
875 self.__class__.__name__.lower(),
876 ),
877 max_burst=limit - 1,
878 count_per_period=getattr( # request count per period
879 self,
880 f"RATELIMIT_{method}_COUNT_PER_PERIOD",
881 30,
882 ),
883 period=getattr(
884 self, f"RATELIMIT_{method}_PERIOD", 60 # period in seconds
885 ),
886 tokens=1 if self.request.method != "HEAD" else 0,
887 )
889 for header, value in headers.items():
890 self.set_header(header, value)
892 if ratelimited:
893 if self.now.date() == date(self.now.year, 4, 20):
894 self.set_status(420)
895 self.write_error(420)
896 else:
897 self.set_status(429)
898 self.write_error(429)
900 return ratelimited
902 def redirect_to_canonical_domain(self) -> bool:
903 """Redirect to the canonical domain."""
904 if (
905 not (domain := self.settings.get("DOMAIN"))
906 or not self.request.headers.get("Host")
907 or self.request.host_name == domain
908 or self.request.host_name.endswith((".onion", ".i2p"))
909 or regex.fullmatch(r"/[\u2800-\u28FF]+/?", self.request.path)
910 ):
911 return False
912 port = urlsplit(f"//{self.request.headers['Host']}").port
913 self.redirect(
914 urlsplit(self.request.full_url())
915 ._replace(netloc=f"{domain}:{port}" if port else domain)
916 .geturl(),
917 permanent=True,
918 )
919 return True
921 @property
922 def redis(self) -> Redis[str]:
923 """
924 Get the Redis client from the settings.
926 This is None if Redis is not enabled.
927 """
928 return cast("Redis[str]", self.settings.get("REDIS"))
930 @property
931 def redis_prefix(self) -> str:
932 """Get the Redis prefix from the settings."""
933 return self.settings.get( # type: ignore[no-any-return]
934 "REDIS_PREFIX", NAME
935 )
937 @override
938 def render( # noqa: D102
939 self, template_name: str, **kwargs: Any
940 ) -> Future[None]:
941 self.used_render = True
942 return super().render(template_name, **kwargs)
944 render.__doc__ = _RequestHandler.render.__doc__
946 def set_content_type_header(self) -> None:
947 """Set the Content-Type header based on `self.content_type`."""
948 if str(self.content_type).startswith("text/"): # RFC 2616 (3.7.1)
949 self.set_header(
950 "Content-Type", f"{self.content_type};charset=utf-8"
951 )
952 elif self.content_type is not None:
953 self.set_header("Content-Type", self.content_type)
955 @override
956 def set_cookie( # noqa: D102 # pylint: disable=too-many-arguments
957 self,
958 name: str,
959 value: str | bytes,
960 domain: None | str = None,
961 expires: None | float | tuple[int, ...] | datetime = None,
962 path: str = "/",
963 expires_days: None | float = 400, # changed
964 *,
965 secure: bool | None = None,
966 httponly: bool = True,
967 **kwargs: Any,
968 ) -> None:
969 if "samesite" not in kwargs:
970 # default for same site should be strict
971 kwargs["samesite"] = "Strict"
973 super().set_cookie(
974 name,
975 value,
976 domain,
977 expires,
978 path,
979 expires_days,
980 secure=(
981 self.request.protocol == "https" if secure is None else secure
982 ),
983 httponly=httponly,
984 **kwargs,
985 )
987 set_cookie.__doc__ = _RequestHandler.set_cookie.__doc__
989 def set_csp_header(self) -> None:
990 """Set the Content-Security-Policy header."""
991 self.nonce = secrets.token_urlsafe(16)
993 script_src = ["'self'", f"'nonce-{self.nonce}'"]
995 if (
996 self.apm_enabled
997 and "INLINE_SCRIPT_HASH" in self.settings["ELASTIC_APM"]
998 ):
999 script_src.extend(
1000 (
1001 f"'sha256-{self.settings['ELASTIC_APM']['INLINE_SCRIPT_HASH']}'",
1002 "'unsafe-inline'", # for browsers that don't support hash
1003 )
1004 )
1006 connect_src = ["'self'"]
1008 if self.apm_enabled and "SERVER_URL" in self.settings["ELASTIC_APM"]:
1009 rum_server_url = self.settings["ELASTIC_APM"].get("RUM_SERVER_URL")
1010 if rum_server_url:
1011 # the RUM agent needs to connect to rum_server_url
1012 connect_src.append(rum_server_url)
1013 elif rum_server_url is None:
1014 # the RUM agent needs to connect to ["ELASTIC_APM"]["SERVER_URL"]
1015 connect_src.append(self.settings["ELASTIC_APM"]["SERVER_URL"])
1017 connect_src.append( # fix for older browsers
1018 ("wss" if self.request.protocol == "https" else "ws")
1019 + f"://{self.request.host}"
1020 )
1022 self.set_header(
1023 "Content-Security-Policy",
1024 "default-src 'self';"
1025 f"script-src {' '.join(script_src)};"
1026 f"connect-src {' '.join(connect_src)};"
1027 "style-src 'self' 'unsafe-inline';"
1028 "img-src 'self' https://img.zeit.de https://github.asozial.org;"
1029 "frame-ancestors 'self';"
1030 "sandbox allow-downloads allow-same-origin allow-modals"
1031 " allow-popups-to-escape-sandbox allow-scripts allow-popups"
1032 " allow-top-navigation-by-user-activation allow-forms;"
1033 "report-to default;"
1034 "base-uri 'none';"
1035 + (
1036 f"report-uri {self.get_reporting_api_endpoint()};"
1037 if self.settings.get("REPORTING")
1038 else ""
1039 ),
1040 )
1042 @override
1043 def set_default_headers(self) -> None:
1044 """Set default headers."""
1045 self.set_csp_header()
1046 self.active_origin_trials = set()
1047 if self.settings.get("REPORTING"):
1048 endpoint = self.get_reporting_api_endpoint()
1049 self.set_header(
1050 "Reporting-Endpoints",
1051 f'default="{endpoint}"', # noqa: B907
1052 )
1053 self.set_header(
1054 "Report-To",
1055 json.dumps(
1056 {
1057 "group": "default",
1058 "max_age": 2592000,
1059 "endpoints": [{"url": endpoint}],
1060 },
1061 option=ORJSON_OPTIONS,
1062 ),
1063 )
1064 self.set_header("NEL", '{"report_to":"default","max_age":2592000}')
1065 self.set_header("X-Content-Type-Options", "nosniff")
1066 self.set_header("Access-Control-Max-Age", "7200")
1067 self.set_header("Access-Control-Allow-Origin", "*")
1068 self.set_header("Access-Control-Allow-Headers", "*")
1069 self.set_header(
1070 "Access-Control-Allow-Methods",
1071 ", ".join(self.get_allowed_methods()),
1072 )
1073 self.set_header("Cross-Origin-Resource-Policy", "cross-origin")
1074 self.set_header(
1075 "Permissions-Policy",
1076 "browsing-topics=(),"
1077 "identity-credentials-get=(),"
1078 "join-ad-interest-group=(),"
1079 "private-state-token-issuance=(),"
1080 "private-state-token-redemption=(),"
1081 "run-ad-auction=()",
1082 )
1083 self.set_header("Referrer-Policy", "same-origin")
1084 self.set_header(
1085 "Cross-Origin-Opener-Policy", "same-origin; report-to=default"
1086 )
1087 if self.request.path == "/kaenguru-comics-alt": # TODO: improve this
1088 self.set_header(
1089 "Cross-Origin-Embedder-Policy",
1090 "credentialless; report-to=default",
1091 )
1092 else:
1093 self.set_header(
1094 "Cross-Origin-Embedder-Policy",
1095 "require-corp; report-to=default",
1096 )
1097 if self.settings.get("HSTS"):
1098 self.set_header("Strict-Transport-Security", "max-age=63072000")
1099 if (
1100 onion_address := self.settings.get("ONION_ADDRESS")
1101 ) and not self.request.host_name.endswith(".onion"):
1102 self.set_header(
1103 "Onion-Location",
1104 onion_address
1105 + self.request.path
1106 + (f"?{self.request.query}" if self.request.query else ""),
1107 )
1108 if self.settings.get("debug"):
1109 self.set_header("X-Debug", bool_to_str(True))
1110 for permission in Permission:
1111 if permission.name:
1112 self.set_header(
1113 f"X-Permission-{permission.name}",
1114 bool_to_str(bool(self.is_authorized(permission))),
1115 )
1116 self.set_header("Vary", "Accept, Authorization, Cookie")
1118 set_default_headers.__doc__ = _RequestHandler.set_default_headers.__doc__
1120 @classmethod
1121 def supports_head(cls) -> bool:
1122 """Check whether this request handler supports HEAD requests."""
1123 signature = inspect.signature(cls.get)
1124 return (
1125 "head" in signature.parameters
1126 and signature.parameters["head"].kind
1127 == inspect.Parameter.KEYWORD_ONLY
1128 )
1130 @cached_property
1131 def user_settings(self) -> Options:
1132 """Get the user settings."""
1133 return Options(self)
1135 @override
1136 def write(self, chunk: str | bytes | dict[str, Any]) -> None: # noqa: D102
1137 if self._finished:
1138 raise RuntimeError("Cannot write() after finish()")
1140 self.set_content_type_header()
1142 if isinstance(chunk, dict):
1143 chunk = self.dump(chunk)
1145 if self.now.date() == date(self.now.year, 4, 27):
1146 if isinstance(chunk, bytes):
1147 with contextlib.suppress(UnicodeDecodeError):
1148 chunk = chunk.decode("UTF-8")
1149 if isinstance(chunk, str):
1150 chunk = regex.sub(
1151 r"\b\p{Lu}\p{Ll}{4}\p{Ll}*\b",
1152 lambda match: (
1153 "Stanley"
1154 if Random(match[0]).randrange(5) == self.now.year % 5
1155 else match[0]
1156 ),
1157 chunk,
1158 )
1160 super().write(chunk)
1162 write.__doc__ = _RequestHandler.write.__doc__
1164 @override
1165 def write_error(self, status_code: int, **kwargs: Any) -> None:
1166 """Render the error page."""
1167 dict_content_types: tuple[str, str] = (
1168 "application/json",
1169 "application/yaml",
1170 )
1171 all_error_content_types: tuple[str, ...] = (
1172 # text/plain as first (default), to not screw up output in terminals
1173 "text/plain",
1174 "text/html",
1175 "text/markdown",
1176 *dict_content_types,
1177 "application/vnd.asozial.dynload+json",
1178 )
1180 if self.content_type not in all_error_content_types:
1181 # don't send 406, instead default with text/plain
1182 self.handle_accept_header(all_error_content_types, strict=False)
1184 if self.content_type == "text/html":
1185 self.render( # type: ignore[unused-awaitable]
1186 "error.html",
1187 status=status_code,
1188 reason=self.get_error_message(**kwargs),
1189 description=self.get_error_page_description(status_code),
1190 is_traceback="exc_info" in kwargs
1191 and not issubclass(kwargs["exc_info"][0], HTTPError)
1192 and (
1193 self.settings.get("serve_traceback")
1194 or self.is_authorized(Permission.TRACEBACK)
1195 ),
1196 )
1197 return
1199 if self.content_type in dict_content_types:
1200 self.finish( # type: ignore[unused-awaitable]
1201 {
1202 "status": status_code,
1203 "reason": self.get_error_message(**kwargs),
1204 }
1205 )
1206 return
1208 self.finish( # type: ignore[unused-awaitable]
1209 f"{status_code} {self.get_error_message(**kwargs)}\n"
1210 )
1212 write_error.__doc__ = _RequestHandler.write_error.__doc__