Coverage for an_website/utils/base_request_handler.py: 79.508%
488 statements
« prev ^ index » next coverage.py v7.6.4, created at 2024-11-16 19:56 +0000
« prev ^ index » next coverage.py v7.6.4, created at 2024-11-16 19:56 +0000
1# This program is free software: you can redistribute it and/or modify
2# it under the terms of the GNU Affero General Public License as
3# published by the Free Software Foundation, either version 3 of the
4# License, or (at your option) any later version.
5#
6# This program is distributed in the hope that it will be useful,
7# but WITHOUT ANY WARRANTY; without even the implied warranty of
8# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9# GNU Affero General Public License for more details.
10#
11# You should have received a copy of the GNU Affero General Public License
12# along with this program. If not, see <https://www.gnu.org/licenses/>.
13# pylint: disable=too-many-lines
15"""
16The base request handler used by other modules.
18This should only contain the BaseRequestHandler class.
19"""
21from __future__ import annotations
23import contextlib
24import inspect
25import logging
26import secrets
27import sys
28import traceback
29import uuid
30from asyncio import Future
31from base64 import b64decode
32from collections.abc import Awaitable, Callable, Coroutine
33from contextvars import ContextVar
34from datetime import date, datetime, timedelta, timezone, tzinfo
35from functools import cached_property, partial, reduce
36from random import Random, choice as random_choice
37from types import TracebackType
38from typing import Any, ClassVar, Final, cast, override
39from urllib.parse import SplitResult, urlsplit, urlunsplit
40from zoneinfo import ZoneInfo
42import elasticapm
43import html2text
44import orjson as json
45import regex
46import tornado.web
47import yaml
48from accept_types import get_best_match # type: ignore[import-untyped]
49from ansi2html import Ansi2HTMLConverter
50from bs4 import BeautifulSoup
51from dateutil.easter import easter
52from elastic_transport import ApiError, TransportError
53from elasticsearch import AsyncElasticsearch
54from openmoji_dist import VERSION as OPENMOJI_VERSION
55from redis.asyncio import Redis
56from tornado.httputil import HTTPServerRequest
57from tornado.iostream import StreamClosedError
58from tornado.web import (
59 Finish,
60 GZipContentEncoding,
61 HTTPError,
62 MissingArgumentError,
63 OutputTransform,
64)
66from .. import (
67 EVENT_ELASTICSEARCH,
68 EVENT_REDIS,
69 GH_ORG_URL,
70 GH_PAGES_URL,
71 GH_REPO_URL,
72 NAME,
73 ORJSON_OPTIONS,
74 pytest_is_running,
75)
76from .decorators import is_authorized
77from .options import Options
78from .static_file_handling import FILE_HASHES_DICT, fix_static_path
79from .themes import THEMES
80from .utils import (
81 ModuleInfo,
82 Permission,
83 add_args_to_url,
84 ansi_replace,
85 apply,
86 backspace_replace,
87 bool_to_str,
88 emoji2html,
89 geoip,
90 hash_bytes,
91 is_prime,
92 ratelimit,
93 str_to_bool,
94)
96LOGGER: Final = logging.getLogger(__name__)
98TEXT_CONTENT_TYPES: Final[set[str]] = {
99 "application/javascript",
100 "application/json",
101 "application/vnd.asozial.dynload+json",
102 "application/x-ndjson",
103 "application/xml",
104 "application/yaml",
105}
107request_ctx_var: ContextVar[HTTPServerRequest] = ContextVar("current_request")
110class _RequestHandler(tornado.web.RequestHandler):
111 """Base for tornado request handlers."""
113 @override
114 async def _execute(
115 self, transforms: list[OutputTransform], *args: bytes, **kwargs: bytes
116 ) -> None:
117 request_ctx_var.set(self.request)
118 return await super()._execute(transforms, *args, **kwargs)
120 # pylint: disable-next=protected-access
121 _execute.__doc__ = tornado.web.RequestHandler._execute.__doc__
123 @override
124 def data_received( # noqa: D102
125 self, chunk: bytes
126 ) -> None | Awaitable[None]:
127 pass
129 data_received.__doc__ = tornado.web.RequestHandler.data_received.__doc__
131 @override
132 def log_exception(
133 self,
134 typ: None | type[BaseException],
135 value: None | BaseException,
136 tb: None | TracebackType,
137 ) -> None:
138 if isinstance(value, HTTPError):
139 super().log_exception(typ, value, tb)
140 elif typ is StreamClosedError:
141 LOGGER.debug(
142 "Stream closed %s",
143 self._request_summary(),
144 exc_info=(typ, value, tb), # type: ignore[arg-type]
145 )
146 else:
147 LOGGER.error(
148 "Uncaught exception %s",
149 self._request_summary(),
150 exc_info=(typ, value, tb), # type: ignore[arg-type]
151 )
153 log_exception.__doc__ = tornado.web.RequestHandler.log_exception.__doc__
156class BaseRequestHandler(_RequestHandler):
157 """The base request handler used by every page and API."""
159 # pylint: disable=too-many-instance-attributes, too-many-public-methods
161 ELASTIC_RUM_URL: ClassVar[str] = (
162 "/@elastic/apm-rum@5.12.0/dist/bundles/elastic-apm-rum"
163 f".umd{'.min' if not sys.flags.dev_mode else ''}.js"
164 )
166 COMPUTE_ETAG: ClassVar[bool] = True
167 ALLOW_COMPRESSION: ClassVar[bool] = True
168 MAX_BODY_SIZE: ClassVar[None | int] = None
169 ALLOWED_METHODS: ClassVar[tuple[str, ...]] = ("GET",)
170 POSSIBLE_CONTENT_TYPES: ClassVar[tuple[str, ...]] = ()
172 module_info: ModuleInfo
173 # info about page, can be overridden in module_info
174 title: str = "Das Asoziale Netzwerk"
175 short_title: str = "Asoziales Netzwerk"
176 description: str = "Die tolle Webseite des Asozialen Netzwerks"
178 used_render: bool = False
180 active_origin_trials: set[str]
181 content_type: None | str = None
182 apm_script: None | str
183 crawler: bool = False
184 nonce: str
186 def _finish(
187 self, chunk: None | str | bytes | dict[str, Any] = None
188 ) -> Future[None]:
189 if self._finished:
190 raise RuntimeError("finish() called twice")
192 if chunk is not None:
193 self.write(chunk)
195 if ( # pylint: disable=too-many-boolean-expressions
196 (content_type := self.content_type)
197 and (
198 content_type in TEXT_CONTENT_TYPES
199 or content_type.startswith("text/")
200 or content_type.endswith(("+xml", "+json"))
201 )
202 and self._write_buffer
203 and not self._write_buffer[-1].endswith(b"\n")
204 ):
205 self.write(b"\n")
207 return super().finish()
209 @property
210 def apm_client(self) -> None | elasticapm.Client:
211 """Get the APM client from the settings."""
212 return self.settings.get("ELASTIC_APM", {}).get("CLIENT") # type: ignore[no-any-return]
214 @property
215 def apm_enabled(self) -> bool:
216 """Return whether APM is enabled."""
217 return bool(self.settings.get("ELASTIC_APM", {}).get("ENABLED"))
219 @override
220 def compute_etag(self) -> None | str:
221 """Compute ETag with Base85 encoding."""
222 if not self.COMPUTE_ETAG:
223 return None
224 return f'"{hash_bytes(*self._write_buffer)}"' # noqa: B907
226 @override
227 def data_received( # noqa: D102
228 self, chunk: bytes
229 ) -> None | Awaitable[None]:
230 pass
232 @override
233 def decode_argument( # noqa: D102
234 self, value: bytes, name: str | None = None
235 ) -> str:
236 try:
237 return value.decode("UTF-8", "replace")
238 except UnicodeDecodeError as exc:
239 err_msg = f"Invalid unicode in {name or 'url'}: {value[:40]!r}"
240 LOGGER.exception(err_msg, exc_info=exc)
241 raise HTTPError(400, err_msg) from exc
243 @property
244 def dump(self) -> Callable[[Any], str | bytes]:
245 """Get the function for dumping the output."""
246 yaml_subset = self.content_type in {
247 "application/json",
248 "application/vnd.asozial.dynload+json",
249 }
251 if self.content_type == "application/yaml":
252 if self.now.timetuple()[2:0:-1] == (1, 4):
253 yaml_subset = True
254 else:
255 return lambda spam: yaml.dump(
256 spam,
257 width=self.get_int_argument("yaml_width", 80, min_=80),
258 )
260 if yaml_subset:
261 option = ORJSON_OPTIONS
262 if self.get_bool_argument("pretty", False):
263 option |= json.OPT_INDENT_2
264 return lambda spam: json.dumps(spam, option=option)
266 return lambda spam: spam
268 @property
269 def elasticsearch(self) -> AsyncElasticsearch:
270 """
271 Get the Elasticsearch client from the settings.
273 This is None if Elasticsearch is not enabled.
274 """
275 return cast(AsyncElasticsearch, self.settings.get("ELASTICSEARCH"))
277 @property
278 def elasticsearch_prefix(self) -> str:
279 """Get the Elasticsearch prefix from the settings."""
280 return self.settings.get( # type: ignore[no-any-return]
281 "ELASTICSEARCH_PREFIX", NAME
282 )
284 @override
285 def finish( # noqa: D102
286 self, chunk: None | str | bytes | dict[Any, Any] = None
287 ) -> Future[None]:
288 as_json = self.content_type == "application/vnd.asozial.dynload+json"
289 as_plain_text = self.content_type == "text/plain"
290 as_markdown = self.content_type == "text/markdown"
292 if (
293 not isinstance(chunk, bytes | str)
294 or self.content_type == "text/html"
295 or not self.used_render
296 or not (as_json or as_plain_text or as_markdown)
297 ):
298 return self._finish(chunk)
300 chunk = chunk.decode("UTF-8") if isinstance(chunk, bytes) else chunk
302 if as_markdown:
303 return self._finish(
304 f"# {self.title}\n\n"
305 + html2text.html2text(chunk, self.request.full_url()).strip()
306 )
308 soup = BeautifulSoup(chunk, features="lxml")
310 if as_plain_text:
311 return self._finish(soup.get_text("\n", True))
313 dictionary: dict[str, Any] = {
314 "url": self.fix_url(),
315 "title": self.title,
316 "short_title": (
317 self.short_title if self.title != self.short_title else None
318 ),
319 "body": "".join(
320 str(element)
321 for element in soup.find_all(name="main")[0].contents
322 ).strip(),
323 "scripts": (
324 [
325 {"script": script.string} | script.attrs
326 for script in soup.find_all("script")
327 ]
328 if soup.head
329 else []
330 ),
331 "stylesheets": (
332 [
333 stylesheet.get("href").strip()
334 for stylesheet in soup.find_all("link", rel="stylesheet")
335 ]
336 if soup.head
337 else []
338 ),
339 "css": (
340 "\n".join(style.string for style in soup.find_all("style"))
341 if soup.head
342 else ""
343 ),
344 }
346 return self._finish(dictionary)
348 finish.__doc__ = _RequestHandler.finish.__doc__
350 def finish_dict(self, **kwargs: Any) -> Future[None]:
351 """Finish the request with a dictionary."""
352 return self.finish(kwargs)
354 def fix_url(
355 self,
356 url: None | str | SplitResult = None,
357 new_path: None | str = None,
358 **query_args: None | str | bool | float,
359 ) -> str:
360 """
361 Fix a URL and return it.
363 If the URL is from another website, link to it with the redirect page,
364 otherwise just return the URL with no_3rd_party appended.
365 """
366 if url is None:
367 url = self.request.full_url()
368 if isinstance(url, str):
369 url = urlsplit(url)
370 if url.netloc and url.netloc.lower() != self.request.host.lower():
371 if (
372 not self.user_settings.ask_before_leaving
373 or not self.settings.get("REDIRECT_MODULE_LOADED")
374 ):
375 return url.geturl()
376 path = "/redirect"
377 query_args["to"] = url.geturl()
378 url = urlsplit(self.request.full_url())
379 else:
380 path = url.path if new_path is None else new_path
381 path = f"/{path.strip('/')}".lower()
382 if path == "/lolwut":
383 path = path.upper()
384 if path.startswith("/soundboard/files/") or path in FILE_HASHES_DICT:
385 query_args.update(
386 dict.fromkeys(self.user_settings.iter_option_names())
387 )
388 else:
389 for (
390 key,
391 value,
392 ) in self.user_settings.as_dict_with_str_values().items():
393 query_args.setdefault(key, value)
394 for key, value in self.user_settings.as_dict_with_str_values(
395 include_query_argument=False,
396 include_body_argument=self.request.path == "/einstellungen"
397 and self.get_bool_argument("save_in_cookie", False),
398 ).items():
399 if value == query_args[key]:
400 query_args[key] = None
402 return add_args_to_url(
403 urlunsplit(
404 (
405 self.request.protocol,
406 self.request.host,
407 "" if path == "/" else path,
408 url.query,
409 url.fragment,
410 )
411 ),
412 **query_args,
413 )
415 def geoip(
416 self,
417 ip: None | str = None,
418 database: str = geoip.__defaults__[0], # type: ignore[index]
419 *,
420 allow_fallback: bool = True,
421 ) -> Coroutine[None, None, None | dict[str, Any]]:
422 """Get GeoIP information."""
423 if not ip:
424 ip = self.request.remote_ip
425 if not EVENT_ELASTICSEARCH.is_set():
426 return geoip(ip, database)
427 return geoip(
428 ip, database, self.elasticsearch, allow_fallback=allow_fallback
429 )
431 @classmethod
432 def get_allowed_methods(cls) -> list[str]:
433 """Get allowed methods."""
434 methods = {"OPTIONS", *cls.ALLOWED_METHODS}
435 if "GET" in cls.ALLOWED_METHODS and cls.supports_head():
436 methods.add("HEAD")
437 return sorted(methods)
439 def get_bool_argument(
440 self,
441 name: str,
442 default: None | bool = None,
443 ) -> bool:
444 """Get an argument parsed as boolean."""
445 if default is not None:
446 return str_to_bool(self.get_argument(name, ""), default)
447 value = str(self.get_argument(name))
448 try:
449 return str_to_bool(value)
450 except ValueError as err:
451 raise HTTPError(400, f"{value} is not a boolean") from err
453 def get_display_theme(self) -> str:
454 """Get the theme currently displayed."""
455 theme = self.user_settings.theme
457 if theme == "default" and self.now.month == 12:
458 return "christmas"
460 if theme.split("_")[0] != "random":
461 return theme
463 ignore_themes = ["random", "random_dark"]
465 if self.now.month != 12:
466 ignore_themes.append("christmas")
468 if theme == "random_dark":
469 ignore_themes.extend(("light", "light_blue", "fun"))
471 return random_choice( # nosec: B311
472 tuple(theme for theme in THEMES if theme not in ignore_themes)
473 )
475 def get_error_message(self, **kwargs: Any) -> str:
476 """
477 Get the error message and return it.
479 If the serve_traceback setting is true (debug mode is activated),
480 the traceback gets returned.
481 """
482 if "exc_info" in kwargs and not issubclass(
483 kwargs["exc_info"][0], HTTPError
484 ):
485 if self.settings.get("serve_traceback") or self.is_authorized(
486 Permission.TRACEBACK
487 ):
488 return "".join(
489 traceback.format_exception(*kwargs["exc_info"])
490 ).strip()
491 return "".join(
492 traceback.format_exception_only(*kwargs["exc_info"][:2])
493 ).strip()
494 if "exc_info" in kwargs and issubclass(
495 kwargs["exc_info"][0], MissingArgumentError
496 ):
497 return cast(str, kwargs["exc_info"][1].log_message)
498 return str(self._reason)
500 def get_error_page_description(self, status_code: int) -> str:
501 """Get the description for the error page."""
502 # pylint: disable=too-many-return-statements
503 # https://developer.mozilla.org/docs/Web/HTTP/Status
504 if 100 <= status_code <= 199:
505 return "Hier gibt es eine total wichtige Information."
506 if 200 <= status_code <= 299:
507 return "Hier ist alles super! 🎶🎶"
508 if 300 <= status_code <= 399:
509 return "Eine Umleitung ist eingerichtet."
510 if 400 <= status_code <= 499:
511 if status_code == 404:
512 return f"{self.request.path} wurde nicht gefunden."
513 if status_code == 451:
514 return "Hier wäre bestimmt geiler Scheiß."
515 return "Ein Client-Fehler ist aufgetreten."
516 if 500 <= status_code <= 599:
517 return "Ein Server-Fehler ist aufgetreten."
518 raise ValueError(
519 f"{status_code} is not a valid HTTP response status code."
520 )
522 def get_int_argument(
523 self,
524 name: str,
525 default: None | int = None,
526 *,
527 max_: None | int = None,
528 min_: None | int = None,
529 ) -> int:
530 """Get an argument parsed as integer."""
531 if default is None:
532 str_value = self.get_argument(name)
533 try:
534 value = int(str_value, base=0)
535 except ValueError as err:
536 raise HTTPError(400, f"{str_value} is not an integer") from err
537 elif self.get_argument(name, ""):
538 try:
539 value = int(self.get_argument(name), base=0)
540 except ValueError:
541 value = default
542 else:
543 value = default
545 if max_ is not None:
546 value = min(max_, value)
547 if min_ is not None:
548 value = max(min_, value)
550 return value
552 def get_module_infos(self) -> tuple[ModuleInfo, ...]:
553 """Get the module infos."""
554 return self.settings.get("MODULE_INFOS") or ()
556 def get_reporting_api_endpoint(self) -> None | str:
557 """Get the endpoint for the Reporting API™️."""
558 if not self.settings.get("REPORTING"):
559 return None
560 endpoint = self.settings.get("REPORTING_ENDPOINT")
562 if not endpoint or not endpoint.startswith("/"):
563 return endpoint
565 return f"{self.request.protocol}://{self.request.host}{endpoint}"
567 @override
568 def get_template_namespace(self) -> dict[str, Any]:
569 """
570 Add useful things to the template namespace and return it.
572 They are mostly needed by most of the pages (like title,
573 description and no_3rd_party).
574 """
575 namespace = super().get_template_namespace()
576 ansi2html = partial(
577 Ansi2HTMLConverter(inline=True, scheme="xterm").convert, full=False
578 )
579 namespace.update(self.user_settings.as_dict())
580 namespace.update(
581 ansi2html=partial(
582 reduce, apply, (ansi2html, ansi_replace, backspace_replace)
583 ),
584 apm_script=(
585 self.settings["ELASTIC_APM"].get("INLINE_SCRIPT")
586 if self.apm_enabled
587 else None
588 ),
589 as_html=self.content_type == "text/html",
590 c=self.now.date() == date(self.now.year, 4, 1)
591 or str_to_bool(self.get_cookie("c", "f") or "f", False),
592 canonical_url=self.fix_url(
593 self.request.full_url().upper()
594 if self.request.path.upper().startswith("/LOLWUT")
595 else self.request.full_url().lower()
596 ).split("?")[0],
597 description=self.description,
598 display_theme=self.get_display_theme(),
599 elastic_rum_url=self.ELASTIC_RUM_URL,
600 fix_static=lambda path: self.fix_url(fix_static_path(path)),
601 fix_url=self.fix_url,
602 emoji2html=(
603 emoji2html
604 if self.user_settings.openmoji == "img"
605 else (
606 (lambda emoji: f'<span class="openmoji">{emoji}</span>')
607 if self.user_settings.openmoji
608 else (lambda emoji: emoji)
609 )
610 ),
611 form_appendix=self.user_settings.get_form_appendix(),
612 GH_ORG_URL=GH_ORG_URL,
613 GH_PAGES_URL=GH_PAGES_URL,
614 GH_REPO_URL=GH_REPO_URL,
615 keywords="Asoziales Netzwerk, Känguru-Chroniken"
616 + (
617 f", {self.module_info.get_keywords_as_str(self.request.path)}"
618 if self.module_info # type: ignore[truthy-bool]
619 else ""
620 ),
621 lang="de", # TODO: add language support
622 nonce=self.nonce,
623 now=self.now,
624 openmoji_version=OPENMOJI_VERSION,
625 settings=self.settings,
626 short_title=self.short_title,
627 testing=pytest_is_running(),
628 title=self.title,
629 )
630 namespace.update(
631 {
632 "🥚": timedelta()
633 <= self.now.date() - easter(self.now.year)
634 < timedelta(days=2),
635 "🦘": is_prime(self.now.microsecond),
636 }
637 )
638 return namespace
640 async def get_time(self) -> datetime:
641 """Get the start time of the request in the users' timezone."""
642 tz: tzinfo = timezone.utc
643 try:
644 geoip = await self.geoip() # pylint: disable=redefined-outer-name
645 except (ApiError, TransportError):
646 LOGGER.exception("Elasticsearch request failed")
647 if self.apm_client:
648 self.apm_client.capture_exception() # type: ignore[no-untyped-call]
649 else:
650 if geoip and "timezone" in geoip:
651 tz = ZoneInfo(geoip["timezone"])
652 return datetime.fromtimestamp(
653 self.request._start_time, tz=tz # pylint: disable=protected-access
654 )
656 def get_user_id(self) -> str:
657 """Get the user id saved in the cookie or create one."""
658 cookie = self.get_secure_cookie(
659 "user_id",
660 max_age_days=90,
661 min_version=2,
662 )
664 user_id = cookie.decode("UTF-8") if cookie else str(uuid.uuid4())
666 if not self.get_secure_cookie( # save it in cookie or reset expiry date
667 "user_id", max_age_days=30, min_version=2
668 ):
669 self.set_secure_cookie(
670 "user_id",
671 user_id,
672 expires_days=90,
673 path="/",
674 samesite="Strict",
675 )
677 return user_id
679 def handle_accept_header( # pylint: disable=inconsistent-return-statements
680 self, possible_content_types: tuple[str, ...], strict: bool = True
681 ) -> None:
682 """Handle the Accept header and set `self.content_type`."""
683 if not possible_content_types:
684 return
685 content_type = get_best_match(
686 self.request.headers.get("Accept") or "*/*",
687 possible_content_types,
688 )
689 if content_type is None:
690 if strict:
691 return self.handle_not_acceptable(possible_content_types)
692 content_type = possible_content_types[0]
693 self.content_type = content_type
694 self.set_content_type_header()
696 def handle_not_acceptable(
697 self, possible_content_types: tuple[str, ...]
698 ) -> None:
699 """Only call this if we cannot respect the Accept header."""
700 self.clear_header("Content-Type")
701 self.set_status(406)
702 raise Finish("\n".join(possible_content_types) + "\n")
704 def head(self, *args: Any, **kwargs: Any) -> None | Awaitable[None]:
705 """Handle HEAD requests."""
706 if self.get.__module__ == "tornado.web":
707 raise HTTPError(405)
708 if not self.supports_head():
709 raise HTTPError(501)
711 kwargs["head"] = True
712 return self.get(*args, **kwargs)
714 @override
715 def initialize(
716 self,
717 *,
718 module_info: ModuleInfo,
719 # default is true, because then empty args dicts are
720 # enough to specify that the defaults should be used
721 default_title: bool = True,
722 default_description: bool = True,
723 ) -> None:
724 """
725 Get title and description from the kwargs.
727 If title and description are present in the kwargs,
728 then they override self.title and self.description.
729 """
730 self.module_info = module_info
731 if not default_title:
732 page_info = self.module_info.get_page_info(self.request.path)
733 self.title = page_info.name
734 self.short_title = page_info.short_name or self.title
735 if not default_description:
736 self.description = self.module_info.get_page_info(
737 self.request.path
738 ).description
740 def is_authorized(
741 self, permission: Permission, allow_cookie_auth: bool = True
742 ) -> bool | None:
743 """Check whether the request is authorized."""
744 return is_authorized(self, permission, allow_cookie_auth)
746 @cached_property
747 def now(self) -> datetime:
748 """Get the current time."""
749 # pylint: disable=method-hidden
750 if pytest_is_running():
751 raise AssertionError("Now accessed before it was set")
752 if self.request.method in self.SUPPORTED_METHODS:
753 LOGGER.error("Now accessed before it was set", stacklevel=3)
754 return datetime.fromtimestamp(
755 self.request._start_time, # pylint: disable=protected-access
756 tz=timezone.utc,
757 )
759 @override
760 async def options(self, *args: Any, **kwargs: Any) -> None:
761 """Handle OPTIONS requests."""
762 # pylint: disable=unused-argument
763 self.set_header("Allow", ", ".join(self.get_allowed_methods()))
764 self.set_status(204)
765 await self.finish()
767 def origin_trial(self, token: bytes | str) -> bool:
768 """Enable an experimental feature."""
769 # pylint: disable=protected-access
770 payload = json.loads(b64decode(token)[69:])
771 if payload["feature"] in self.active_origin_trials:
772 return True
773 origin = urlsplit(payload["origin"])
774 url = urlsplit(self.request.full_url())
775 if url.port is None and url.scheme in {"http", "https"}:
776 url = url._replace(
777 netloc=f"{url.hostname}:{443 if url.scheme == 'https' else 80}"
778 )
779 if self.request._start_time > payload["expiry"]:
780 return False
781 if url.scheme != origin.scheme:
782 return False
783 if url.netloc != origin.netloc and not (
784 payload.get("isSubdomain")
785 and url.netloc.endswith(f".{origin.netloc}")
786 ):
787 return False
788 self.add_header("Origin-Trial", token)
789 self.active_origin_trials.add(payload["feature"])
790 return True
792 @override
793 async def prepare(self) -> None:
794 """Check authorization and call self.ratelimit()."""
795 # pylint: disable=invalid-overridden-method
796 self.now = await self.get_time()
798 if not self.ALLOW_COMPRESSION:
799 for transform in self._transforms:
800 if isinstance(transform, GZipContentEncoding):
801 # pylint: disable=protected-access
802 transform._gzipping = False
804 if crawler_secret := self.settings.get("CRAWLER_SECRET"):
805 self.crawler = crawler_secret in self.request.headers.get(
806 "User-Agent", ""
807 )
809 self.handle_accept_header(self.POSSIBLE_CONTENT_TYPES)
811 if (
812 self.request.method in {"GET", "HEAD"}
813 and self.redirect_to_canonical_domain()
814 ):
815 return
817 if self.request.method == "GET" and (
818 days := Random(self.now.timestamp()).randint(0, 31337)
819 ) in {
820 69,
821 420,
822 1337,
823 31337,
824 }:
825 self.set_cookie("c", "s", expires_days=days / 24, path="/")
827 if self.request.method != "OPTIONS":
828 if (
829 self.MAX_BODY_SIZE is not None
830 and len(self.request.body) > self.MAX_BODY_SIZE
831 ):
832 LOGGER.warning(
833 "%s > MAX_BODY_SIZE (%s)",
834 len(self.request.body),
835 self.MAX_BODY_SIZE,
836 )
837 raise HTTPError(413)
839 if not await self.ratelimit(True):
840 await self.ratelimit()
842 async def ratelimit(self, global_ratelimit: bool = False) -> bool:
843 """Take b1nzy to space using Redis."""
844 if (
845 not self.settings.get("RATELIMITS")
846 or self.request.method == "OPTIONS"
847 or self.is_authorized(Permission.RATELIMITS)
848 or self.crawler
849 ):
850 return False
852 if not EVENT_REDIS.is_set():
853 LOGGER.warning(
854 (
855 "Ratelimits are enabled, but Redis is not available. "
856 "This can happen shortly after starting the website."
857 ),
858 )
859 raise HTTPError(503)
861 if global_ratelimit:
862 ratelimited, headers = await ratelimit(
863 self.redis,
864 self.redis_prefix,
865 str(self.request.remote_ip),
866 bucket=None,
867 max_burst=99, # limit = 100
868 count_per_period=20, # 20 requests per second
869 period=1,
870 tokens=10 if self.settings.get("UNDER_ATTACK") else 1,
871 )
872 else:
873 method = (
874 "GET" if self.request.method == "HEAD" else self.request.method
875 )
876 if not (limit := getattr(self, f"RATELIMIT_{method}_LIMIT", 0)):
877 return False
878 ratelimited, headers = await ratelimit(
879 self.redis,
880 self.redis_prefix,
881 str(self.request.remote_ip),
882 bucket=getattr(
883 self,
884 f"RATELIMIT_{method}_BUCKET",
885 self.__class__.__name__.lower(),
886 ),
887 max_burst=limit - 1,
888 count_per_period=getattr( # request count per period
889 self,
890 f"RATELIMIT_{method}_COUNT_PER_PERIOD",
891 30,
892 ),
893 period=getattr(
894 self, f"RATELIMIT_{method}_PERIOD", 60 # period in seconds
895 ),
896 tokens=1 if self.request.method != "HEAD" else 0,
897 )
899 for header, value in headers.items():
900 self.set_header(header, value)
902 if ratelimited:
903 if self.now.date() == date(self.now.year, 4, 20):
904 self.set_status(420)
905 self.write_error(420)
906 else:
907 self.set_status(429)
908 self.write_error(429)
910 return ratelimited
912 def redirect_to_canonical_domain(self) -> bool:
913 """Redirect to the canonical domain."""
914 if (
915 not (domain := self.settings.get("DOMAIN"))
916 or not self.request.headers.get("Host")
917 or self.request.host_name == domain
918 or self.request.host_name.endswith((".onion", ".i2p"))
919 or regex.fullmatch(r"/[\u2800-\u28FF]+/?", self.request.path)
920 ):
921 return False
922 port = urlsplit(f"//{self.request.headers['Host']}").port
923 self.redirect(
924 urlsplit(self.request.full_url())
925 ._replace(netloc=f"{domain}:{port}" if port else domain)
926 .geturl(),
927 permanent=True,
928 )
929 return True
931 @property
932 def redis(self) -> Redis[str]:
933 """
934 Get the Redis client from the settings.
936 This is None if Redis is not enabled.
937 """
938 return cast("Redis[str]", self.settings.get("REDIS"))
940 @property
941 def redis_prefix(self) -> str:
942 """Get the Redis prefix from the settings."""
943 return self.settings.get( # type: ignore[no-any-return]
944 "REDIS_PREFIX", NAME
945 )
947 @override
948 def render( # noqa: D102
949 self, template_name: str, **kwargs: Any
950 ) -> Future[None]:
951 self.used_render = True
952 return super().render(template_name, **kwargs)
954 render.__doc__ = _RequestHandler.render.__doc__
956 def set_content_type_header(self) -> None:
957 """Set the Content-Type header based on `self.content_type`."""
958 if str(self.content_type).startswith("text/"): # RFC 2616 (3.7.1)
959 self.set_header(
960 "Content-Type", f"{self.content_type};charset=utf-8"
961 )
962 elif self.content_type is not None:
963 self.set_header("Content-Type", self.content_type)
965 @override
966 def set_cookie( # noqa: D102 # pylint: disable=too-many-arguments
967 self,
968 name: str,
969 value: str | bytes,
970 domain: None | str = None,
971 expires: None | float | tuple[int, ...] | datetime = None,
972 path: str = "/",
973 expires_days: None | float = 400, # changed
974 **kwargs: Any,
975 ) -> None:
976 if "samesite" not in kwargs:
977 # default for same site should be strict
978 kwargs["samesite"] = "Strict"
980 super().set_cookie(
981 name,
982 value,
983 domain,
984 expires,
985 path,
986 expires_days,
987 **kwargs,
988 )
990 set_cookie.__doc__ = _RequestHandler.set_cookie.__doc__
992 def set_csp_header(self) -> None:
993 """Set the Content-Security-Policy header."""
994 self.nonce = secrets.token_urlsafe(16)
996 script_src = ["'self'", f"'nonce-{self.nonce}'"]
998 if (
999 self.apm_enabled
1000 and "INLINE_SCRIPT_HASH" in self.settings["ELASTIC_APM"]
1001 ):
1002 script_src.extend(
1003 (
1004 f"'sha256-{self.settings['ELASTIC_APM']['INLINE_SCRIPT_HASH']}'",
1005 "'unsafe-inline'", # for browsers that don't support hash
1006 )
1007 )
1009 connect_src = ["'self'"]
1011 if self.apm_enabled and "SERVER_URL" in self.settings["ELASTIC_APM"]:
1012 rum_server_url = self.settings["ELASTIC_APM"].get("RUM_SERVER_URL")
1013 if rum_server_url:
1014 # the RUM agent needs to connect to rum_server_url
1015 connect_src.append(rum_server_url)
1016 elif rum_server_url is None:
1017 # the RUM agent needs to connect to ["ELASTIC_APM"]["SERVER_URL"]
1018 connect_src.append(self.settings["ELASTIC_APM"]["SERVER_URL"])
1020 connect_src.append( # fix for older browsers
1021 ("wss" if self.request.protocol == "https" else "ws")
1022 + f"://{self.request.host}"
1023 )
1025 self.set_header(
1026 "Content-Security-Policy",
1027 "default-src 'self';"
1028 f"script-src {' '.join(script_src)};"
1029 f"connect-src {' '.join(connect_src)};"
1030 "style-src 'self' 'unsafe-inline';"
1031 "img-src 'self' https://img.zeit.de https://github.asozial.org;"
1032 "frame-ancestors 'self';"
1033 "sandbox allow-downloads allow-same-origin allow-modals"
1034 " allow-popups-to-escape-sandbox allow-scripts allow-popups"
1035 " allow-top-navigation-by-user-activation allow-forms;"
1036 "report-to default;"
1037 + (
1038 f"report-uri {self.get_reporting_api_endpoint()};"
1039 if self.settings.get("REPORTING")
1040 else ""
1041 ),
1042 )
1044 @override
1045 def set_default_headers(self) -> None:
1046 """Set default headers."""
1047 self.set_csp_header()
1048 self.active_origin_trials = set()
1049 if self.settings.get("REPORTING"):
1050 endpoint = self.get_reporting_api_endpoint()
1051 self.set_header(
1052 "Reporting-Endpoints",
1053 f'default="{endpoint}"', # noqa: B907
1054 )
1055 self.set_header(
1056 "Report-To",
1057 json.dumps(
1058 {
1059 "group": "default",
1060 "max_age": 2592000,
1061 "endpoints": [{"url": endpoint}],
1062 },
1063 option=ORJSON_OPTIONS,
1064 ),
1065 )
1066 self.set_header("NEL", '{"report_to":"default","max_age":2592000}')
1067 self.set_header("X-Content-Type-Options", "nosniff")
1068 self.set_header("Access-Control-Max-Age", "7200")
1069 self.set_header("Access-Control-Allow-Origin", "*")
1070 self.set_header("Access-Control-Allow-Headers", "*")
1071 self.set_header(
1072 "Access-Control-Allow-Methods",
1073 ", ".join(self.get_allowed_methods()),
1074 )
1075 self.set_header("Cross-Origin-Resource-Policy", "cross-origin")
1076 self.set_header(
1077 "Permissions-Policy",
1078 "browsing-topics=(),"
1079 "identity-credentials-get=(),"
1080 "join-ad-interest-group=(),"
1081 "private-state-token-issuance=(),"
1082 "private-state-token-redemption=(),"
1083 "run-ad-auction=()",
1084 )
1085 self.set_header("Referrer-Policy", "same-origin")
1086 self.set_header(
1087 "Cross-Origin-Opener-Policy", "same-origin; report-to=default"
1088 )
1089 if self.request.path == "/kaenguru-comics-alt": # TODO: improve this
1090 self.set_header(
1091 "Cross-Origin-Embedder-Policy",
1092 "credentialless; report-to=default",
1093 )
1094 else:
1095 self.set_header(
1096 "Cross-Origin-Embedder-Policy",
1097 "require-corp; report-to=default",
1098 )
1099 if self.settings.get("HSTS"):
1100 self.set_header("Strict-Transport-Security", "max-age=63072000")
1101 if (
1102 onion_address := self.settings.get("ONION_ADDRESS")
1103 ) and not self.request.host_name.endswith(".onion"):
1104 self.set_header(
1105 "Onion-Location",
1106 onion_address
1107 + self.request.path
1108 + (f"?{self.request.query}" if self.request.query else ""),
1109 )
1110 if self.settings.get("debug"):
1111 self.set_header("X-Debug", bool_to_str(True))
1112 for permission in Permission:
1113 if permission.name:
1114 self.set_header(
1115 f"X-Permission-{permission.name}",
1116 bool_to_str(bool(self.is_authorized(permission))),
1117 )
1118 self.set_header("Vary", "Accept, Authorization, Cookie")
1119 self.origin_trial(
1120 "AtmCLo6pBk5FVvAouMNTMnuKR6qZ59kLvYSyVFU54oq7wbRmx1cx1FhR+FivJqRPEeJAIEHXlM6L"
1121 "hH7UcETrWw4AAABmeyJvcmlnaW4iOiJodHRwczovL2Fzb3ppYWwub3JnOjQ0MyIsImZlYXR1cmUi"
1122 "OiJXZWJBcHBUYWJTdHJpcCIsImV4cGlyeSI6MTczMzE4NDAwMCwiaXNTdWJkb21haW4iOnRydWV9"
1123 )
1125 set_default_headers.__doc__ = _RequestHandler.set_default_headers.__doc__
1127 @classmethod
1128 def supports_head(cls) -> bool:
1129 """Check whether this request handler supports HEAD requests."""
1130 signature = inspect.signature(cls.get)
1131 return (
1132 "head" in signature.parameters
1133 and signature.parameters["head"].kind
1134 == inspect.Parameter.KEYWORD_ONLY
1135 )
1137 @cached_property
1138 def user_settings(self) -> Options:
1139 """Get the user settings."""
1140 return Options(self)
1142 @override
1143 def write(self, chunk: str | bytes | dict[str, Any]) -> None: # noqa: D102
1144 if self._finished:
1145 raise RuntimeError("Cannot write() after finish()")
1147 self.set_content_type_header()
1149 if isinstance(chunk, dict):
1150 chunk = self.dump(chunk)
1152 if self.now.date() == date(self.now.year, 4, 27):
1153 if isinstance(chunk, bytes):
1154 with contextlib.suppress(UnicodeDecodeError):
1155 chunk = chunk.decode("UTF-8")
1156 if isinstance(chunk, str):
1157 chunk = regex.sub(
1158 r"\b\p{Lu}\p{Ll}{4}\p{Ll}*\b",
1159 lambda match: (
1160 "Stanley"
1161 if Random(match[0]).randrange(5) == self.now.year % 5
1162 else match[0]
1163 ),
1164 chunk,
1165 )
1167 super().write(chunk)
1169 write.__doc__ = _RequestHandler.write.__doc__
1171 @override
1172 def write_error(self, status_code: int, **kwargs: Any) -> None:
1173 """Render the error page."""
1174 dict_content_types: tuple[str, str] = (
1175 "application/json",
1176 "application/yaml",
1177 )
1178 all_error_content_types: tuple[str, ...] = (
1179 # text/plain as first (default), to not screw up output in terminals
1180 "text/plain",
1181 "text/html",
1182 "text/markdown",
1183 *dict_content_types,
1184 "application/vnd.asozial.dynload+json",
1185 )
1187 if self.content_type not in all_error_content_types:
1188 # don't send 406, instead default with text/plain
1189 self.handle_accept_header(all_error_content_types, strict=False)
1191 if self.content_type == "text/html":
1192 self.render( # type: ignore[unused-awaitable]
1193 "error.html",
1194 status=status_code,
1195 reason=self.get_error_message(**kwargs),
1196 description=self.get_error_page_description(status_code),
1197 is_traceback="exc_info" in kwargs
1198 and not issubclass(kwargs["exc_info"][0], HTTPError)
1199 and (
1200 self.settings.get("serve_traceback")
1201 or self.is_authorized(Permission.TRACEBACK)
1202 ),
1203 )
1204 return
1206 if self.content_type in dict_content_types:
1207 self.finish( # type: ignore[unused-awaitable]
1208 {
1209 "status": status_code,
1210 "reason": self.get_error_message(**kwargs),
1211 }
1212 )
1213 return
1215 self.finish( # type: ignore[unused-awaitable]
1216 f"{status_code} {self.get_error_message(**kwargs)}\n"
1217 )
1219 write_error.__doc__ = _RequestHandler.write_error.__doc__