Coverage for an_website/utils/base_request

1# This program is free software: you can redistribute it and/or modify

2# it under the terms of the GNU Affero General Public License as

3# published by the Free Software Foundation, either version 3 of the

4# License, or (at your option) any later version.

6# This program is distributed in the hope that it will be useful,

7# but WITHOUT ANY WARRANTY; without even the implied warranty of

8# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

9# GNU Affero General Public License for more details.

10#

11# You should have received a copy of the GNU Affero General Public License

12# along with this program. If not, see <https://www.gnu.org/licenses/>.

13# pylint: disable=too-many-lines

15"""

16The base request handler used by other modules.

18This should only contain the BaseRequestHandler class.

19"""

21from __future__ import annotations

23import contextlib

24import inspect

25import logging

26import secrets

27import sys

28import traceback

29import uuid

30from asyncio import Future

31from base64 import b64decode

32from collections.abc import Awaitable, Callable, Coroutine

33from contextvars import ContextVar

34from datetime import date, datetime, timedelta, timezone, tzinfo

35from functools import cached_property, partial, reduce

36from random import Random, choice as random_choice

37from types import TracebackType

38from typing import Any, ClassVar, Final, cast, override

39from urllib.parse import SplitResult, urlsplit, urlunsplit

40from zoneinfo import ZoneInfo

42import elasticapm

43import html2text

44import orjson as json

45import regex

46import tornado.web

47import yaml

48from accept_types import get_best_match # type: ignore[import-untyped]

49from ansi2html import Ansi2HTMLConverter

50from bs4 import BeautifulSoup

51from dateutil.easter import easter

52from elastic_transport import ApiError, TransportError

53from elasticsearch import AsyncElasticsearch

54from openmoji_dist import VERSION as OPENMOJI_VERSION

55from redis.asyncio import Redis

56from tornado.httputil import HTTPServerRequest

57from tornado.iostream import StreamClosedError

58from tornado.web import (

59 Finish,

60 GZipContentEncoding,

61 HTTPError,

62 MissingArgumentError,

63 OutputTransform,

64)

66from .. import (

67 EVENT_ELASTICSEARCH,

68 EVENT_REDIS,

69 GH_ORG_URL,

70 GH_PAGES_URL,

71 GH_REPO_URL,

72 NAME,

73 ORJSON_OPTIONS,

74 pytest_is_running,

75)

76from .decorators import is_authorized

77from .options import ColourScheme, Options

78from .static_file_handling import FILE_HASHES_DICT, fix_static_path

79from .themes import THEMES

80from .utils import (

81 ModuleInfo,

82 Permission,

83 add_args_to_url,

84 ansi_replace,

85 apply,

86 backspace_replace,

87 bool_to_str,

88 emoji2html,

89 geoip,

90 hash_bytes,

91 is_prime,

92 ratelimit,

93 str_to_bool,

94)

96LOGGER: Final = logging.getLogger(__name__)

98TEXT_CONTENT_TYPES: Final[set[str]] = {

99 "application/javascript",

100 "application/json",

101 "application/vnd.asozial.dynload+json",

102 "application/x-ndjson",

103 "application/xml",

104 "application/yaml",

105}

106

107request_ctx_var: ContextVar[HTTPServerRequest] = ContextVar("current_request")

108

109

110class _RequestHandler(tornado.web.RequestHandler):

111 """Base for Tornado request handlers."""

112

113 crawler: bool = False

114

115 @override

116 async def _execute(

117 self, transforms: list[OutputTransform], *args: bytes, **kwargs: bytes

118 ) -> None:

119 request_ctx_var.set(self.request)

120 return await super()._execute(transforms, *args, **kwargs)

121

122 # pylint: disable-next=protected-access

123 _execute.__doc__ = tornado.web.RequestHandler._execute.__doc__

124

125 @property

126 def apm_client(self) -> None | elasticapm.Client:

127 """Get the APM client from the settings."""

128 return self.settings.get("ELASTIC_APM", {}).get("CLIENT") # type: ignore[no-any-return]

129

130 @property

131 def apm_enabled(self) -> bool:

132 """Return whether APM is enabled."""

133 return bool(self.settings.get("ELASTIC_APM", {}).get("ENABLED"))

134

135 @override

136 def data_received( # noqa: D102

137 self, chunk: bytes

138 ) -> None | Awaitable[None]:

139 pass

140

141 data_received.__doc__ = tornado.web.RequestHandler.data_received.__doc__

142

143 @property

144 def elasticsearch(self) -> AsyncElasticsearch:

145 """

146 Get the Elasticsearch client from the settings.

147

148 This is None if Elasticsearch is not enabled.

149 """

150 return cast(AsyncElasticsearch, self.settings.get("ELASTICSEARCH"))

151

152 @property

153 def elasticsearch_prefix(self) -> str:

154 """Get the Elasticsearch prefix from the settings."""

155 return self.settings.get( # type: ignore[no-any-return]

156 "ELASTICSEARCH_PREFIX", NAME

157 )

158

159 def geoip(

160 self,

161 ip: None | str = None,

162 database: str = geoip.__defaults__[0], # type: ignore[index]

163 *,

164 allow_fallback: bool = True,

165 ) -> Coroutine[None, None, None | dict[str, Any]]:

166 """Get GeoIP information."""

167 if not ip:

168 ip = self.request.remote_ip

169 if not EVENT_ELASTICSEARCH.is_set():

170 return geoip(ip, database)

171 return geoip(

172 ip, database, self.elasticsearch, allow_fallback=allow_fallback

173 )

174

175 async def get_time(self) -> datetime:

176 """Get the start time of the request in the users' timezone."""

177 tz: tzinfo = timezone.utc

178 try:

179 geoip = await self.geoip() # pylint: disable=redefined-outer-name

180 except (ApiError, TransportError):

181 LOGGER.exception("Elasticsearch request failed")

182 if self.apm_client:

183 self.apm_client.capture_exception() # type: ignore[no-untyped-call]

184 else:

185 if geoip and "timezone" in geoip:

186 tz = ZoneInfo(geoip["timezone"])

187 return datetime.fromtimestamp(

188 self.request._start_time, tz=tz # pylint: disable=protected-access

189 )

190

191 def is_authorized(

192 self, permission: Permission, allow_cookie_auth: bool = True

193 ) -> bool | None:

194 """Check whether the request is authorized."""

195 return is_authorized(self, permission, allow_cookie_auth)

196

197 @override

198 def log_exception(

199 self,

200 typ: None | type[BaseException],

201 value: None | BaseException,

202 tb: None | TracebackType,

203 ) -> None:

204 if isinstance(value, HTTPError):

205 super().log_exception(typ, value, tb)

206 elif typ is StreamClosedError:

207 LOGGER.debug(

208 "Stream closed %s",

209 self._request_summary(),

210 exc_info=(typ, value, tb), # type: ignore[arg-type]

211 )

212 else:

213 LOGGER.error(

214 "Uncaught exception %s",

215 self._request_summary(),

216 exc_info=(typ, value, tb), # type: ignore[arg-type]

217 )

218

219 log_exception.__doc__ = tornado.web.RequestHandler.log_exception.__doc__

220

221 @cached_property

222 def now(self) -> datetime:

223 """Get the current time."""

224 # pylint: disable=method-hidden

225 if pytest_is_running():

226 raise AssertionError("Now accessed before it was set")

227 if self.request.method in self.SUPPORTED_METHODS:

228 LOGGER.error("Now accessed before it was set", stacklevel=3)

229 return datetime.fromtimestamp(

230 self.request._start_time, # pylint: disable=protected-access

231 tz=timezone.utc,

232 )

233

234 @override

235 async def prepare(self) -> None:

236 """Check authorization and call self.ratelimit()."""

237 # pylint: disable=invalid-overridden-method

238 self.now = await self.get_time()

239

240 if crawler_secret := self.settings.get("CRAWLER_SECRET"):

241 self.crawler = crawler_secret in self.request.headers.get(

242 "User-Agent", ""

243 )

244

245 if (

246 self.request.method in {"GET", "HEAD"}

247 and self.redirect_to_canonical_domain()

248 ):

249 return

250

251 if self.request.method != "OPTIONS" and not await self.ratelimit(True):

252 await self.ratelimit()

253

254 async def ratelimit(self, global_ratelimit: bool = False) -> bool:

255 """Take b1nzy to space using Redis."""

256 if (

257 not self.settings.get("RATELIMITS")

258 or self.request.method == "OPTIONS"

259 or self.is_authorized(Permission.RATELIMITS)

260 or self.crawler

261 ):

262 return False

263

264 if not EVENT_REDIS.is_set():

265 LOGGER.warning(

266 (

267 "Ratelimits are enabled, but Redis is not available. "

268 "This can happen shortly after starting the website."

269 ),

270 )

271 raise HTTPError(503)

272

273 if global_ratelimit: # TODO: add to _RequestHandler

274 ratelimited, headers = await ratelimit(

275 self.redis,

276 self.redis_prefix,

277 str(self.request.remote_ip),

278 bucket=None,

279 max_burst=99, # limit = 100

280 count_per_period=20, # 20 requests per second

281 period=1,

282 tokens=10 if self.settings.get("UNDER_ATTACK") else 1,

283 )

284 else:

285 method = (

286 "GET" if self.request.method == "HEAD" else self.request.method

287 )

288 if not (limit := getattr(self, f"RATELIMIT_{method}_LIMIT", 0)):

289 return False

290 ratelimited, headers = await ratelimit(

291 self.redis,

292 self.redis_prefix,

293 str(self.request.remote_ip),

294 bucket=getattr(

295 self,

296 f"RATELIMIT_{method}_BUCKET",

297 self.__class__.__name__.lower(),

298 ),

299 max_burst=limit - 1,

300 count_per_period=getattr( # request count per period

301 self,

302 f"RATELIMIT_{method}_COUNT_PER_PERIOD",

303 30,

304 ),

305 period=getattr(

306 self, f"RATELIMIT_{method}_PERIOD", 60 # period in seconds

307 ),

308 tokens=1 if self.request.method != "HEAD" else 0,

309 )

310

311 for header, value in headers.items():

312 self.set_header(header, value)

313

314 if ratelimited:

315 if self.now.date() == date(self.now.year, 4, 20):

316 self.set_status(420)

317 self.write_error(420)

318 else:

319 self.set_status(429)

320 self.write_error(429)

321

322 return ratelimited

323

324 def redirect_to_canonical_domain(self) -> bool:

325 """Redirect to the canonical domain."""

326 if (

327 not (domain := self.settings.get("DOMAIN"))

328 or not self.request.headers.get("Host")

329 or self.request.host_name == domain

330 or self.request.host_name.endswith((".onion", ".i2p"))

331 or regex.fullmatch(r"/[\u2800-\u28FF]+/?", self.request.path)

332 ):

333 return False

334 port = urlsplit(f"//{self.request.headers['Host']}").port

335 self.redirect(

336 urlsplit(self.request.full_url())

337 ._replace(netloc=f"{domain}:{port}" if port else domain)

338 .geturl(),

339 permanent=True,

340 )

341 return True

342

343 @property

344 def redis(self) -> Redis[str]:

345 """

346 Get the Redis client from the settings.

347

348 This is None if Redis is not enabled.

349 """

350 return cast("Redis[str]", self.settings.get("REDIS"))

351

352 @property

353 def redis_prefix(self) -> str:

354 """Get the Redis prefix from the settings."""

355 return self.settings.get( # type: ignore[no-any-return]

356 "REDIS_PREFIX", NAME

357 )

358

359

360class BaseRequestHandler(_RequestHandler):

361 """The base request handler used by every page and API."""

362

363 # pylint: disable=too-many-instance-attributes, too-many-public-methods

364

365 ELASTIC_RUM_URL: ClassVar[str] = (

366 f"/@apm-rum/elastic-apm-rum.umd{'' if sys.flags.dev_mode else '.min'}.js"

367 "?v=5.12.0"

368 )

369

370 COMPUTE_ETAG: ClassVar[bool] = True

371 ALLOW_COMPRESSION: ClassVar[bool] = True

372 MAX_BODY_SIZE: ClassVar[None | int] = None

373 ALLOWED_METHODS: ClassVar[tuple[str, ...]] = ("GET",)

374 POSSIBLE_CONTENT_TYPES: ClassVar[tuple[str, ...]] = ()

375

376 module_info: ModuleInfo

377 # info about page, can be overridden in module_info

378 title: str = "Das Asoziale Netzwerk"

379 short_title: str = "Asoziales Netzwerk"

380 description: str = "Die tolle Webseite des Asozialen Netzwerks"

381

382 used_render: bool = False

383

384 active_origin_trials: set[str]

385 content_type: None | str = None

386 apm_script: None | str

387 nonce: str

388

389 def _finish(

390 self, chunk: None | str | bytes | dict[str, Any] = None

391 ) -> Future[None]:

392 if self._finished:

393 raise RuntimeError("finish() called twice")

394

395 if chunk is not None:

396 self.write(chunk)

397

398 if ( # pylint: disable=too-many-boolean-expressions

399 (content_type := self.content_type)

400 and (

401 content_type in TEXT_CONTENT_TYPES

402 or content_type.startswith("text/")

403 or content_type.endswith(("+xml", "+json"))

404 )

405 and self._write_buffer

406 and not self._write_buffer[-1].endswith(b"\n")

407 ):

408 self.write(b"\n")

409

410 return super().finish()

411

412 @override

413 def compute_etag(self) -> None | str:

414 """Compute ETag with Base85 encoding."""

415 if not self.COMPUTE_ETAG:

416 return None

417 return f'"{hash_bytes(*self._write_buffer)}"' # noqa: B907

418

419 @override

420 def decode_argument( # noqa: D102

421 self, value: bytes, name: str | None = None

422 ) -> str:

423 try:

424 return value.decode("UTF-8", "replace")

425 except UnicodeDecodeError as exc:

426 err_msg = f"Invalid unicode in {name or 'url'}: {value[:40]!r}"

427 LOGGER.exception(err_msg, exc_info=exc)

428 raise HTTPError(400, err_msg) from exc

429

430 @property

431 def dump(self) -> Callable[[Any], str | bytes]:

432 """Get the function for dumping the output."""

433 yaml_subset = self.content_type in {

434 "application/json",

435 "application/vnd.asozial.dynload+json",

436 }

437

438 if self.content_type == "application/yaml":

439 if self.now.timetuple()[2:0:-1] == (1, 4):

440 yaml_subset = True

441 else:

442 return lambda spam: yaml.dump(

443 spam,

444 width=self.get_int_argument("yaml_width", 80, min_=80),

445 )

446

447 if yaml_subset:

448 option = ORJSON_OPTIONS

449 if self.get_bool_argument("pretty", False):

450 option |= json.OPT_INDENT_2

451 return lambda spam: json.dumps(spam, option=option)

452

453 return lambda spam: spam

454

455 @override

456 def finish( # noqa: D102

457 self, chunk: None | str | bytes | dict[Any, Any] = None

458 ) -> Future[None]:

459 as_json = self.content_type == "application/vnd.asozial.dynload+json"

460 as_plain_text = self.content_type == "text/plain"

461 as_markdown = self.content_type == "text/markdown"

462

463 if (

464 not isinstance(chunk, bytes | str)

465 or self.content_type == "text/html"

466 or not self.used_render

467 or not (as_json or as_plain_text or as_markdown)

468 ):

469 return self._finish(chunk)

470

471 chunk = chunk.decode("UTF-8") if isinstance(chunk, bytes) else chunk

472

473 if as_markdown:

474 return self._finish(

475 f"# {self.title}\n\n"

476 + html2text.html2text(chunk, self.request.full_url()).strip()

477 )

478

479 soup = BeautifulSoup(chunk, features="lxml")

480

481 if as_plain_text:

482 return self._finish(soup.get_text("\n", True))

483

484 dictionary: dict[str, object] = {

485 "url": self.fix_url(),

486 "title": self.title,

487 "short_title": (

488 self.short_title if self.title != self.short_title else None

489 ),

490 "body": "".join(

491 str(element)

492 for element in soup.find_all(name="main")[0].contents

493 ).strip(),

494 "scripts": [

495 {"script": script.string} | script.attrs

496 for script in soup.find_all("script")

497 ],

498 "stylesheets": [

499 stylesheet.get("href").strip()

500 for stylesheet in soup.find_all("link", rel="stylesheet")

501 ],

502 "css": "\n".join(style.string for style in soup.find_all("style")),

503 }

504

505 return self._finish(dictionary)

506

507 finish.__doc__ = _RequestHandler.finish.__doc__

508

509 def finish_dict(self, **kwargs: Any) -> Future[None]:

510 """Finish the request with a dictionary."""

511 return self.finish(kwargs)

512

513 def fix_url(

514 self,

515 url: None | str | SplitResult = None,

516 new_path: None | str = None,

517 **query_args: None | str | bool | float,

518 ) -> str:

519 """

520 Fix a URL and return it.

521

522 If the URL is from another website, link to it with the redirect page,

523 otherwise just return the URL with no_3rd_party appended.

524 """

525 if url is None:

526 url = self.request.full_url()

527 if isinstance(url, str):

528 url = urlsplit(url)

529 if url.netloc and url.netloc.lower() != self.request.host.lower():

530 if (

531 not self.user_settings.ask_before_leaving

532 or not self.settings.get("REDIRECT_MODULE_LOADED")

533 ):

534 return url.geturl()

535 path = "/redirect"

536 query_args["to"] = url.geturl()

537 url = urlsplit(self.request.full_url())

538 else:

539 path = url.path if new_path is None else new_path

540 path = f"/{path.strip('/')}".lower()

541 if path == "/lolwut":

542 path = path.upper()

543 if path.startswith("/soundboard/files/") or path in FILE_HASHES_DICT:

544 query_args.update(

545 dict.fromkeys(self.user_settings.iter_option_names())

546 )

547 else:

548 for (

549 key,

550 value,

551 ) in self.user_settings.as_dict_with_str_values().items():

552 query_args.setdefault(key, value)

553 for key, value in self.user_settings.as_dict_with_str_values(

554 include_query_argument=False,

555 include_body_argument=self.request.path == "/einstellungen"

556 and self.get_bool_argument("save_in_cookie", False),

557 ).items():

558 if value == query_args[key]:

559 query_args[key] = None

560

561 return add_args_to_url(

562 urlunsplit(

563 (

564 self.request.protocol,

565 self.request.host,

566 "" if path == "/" else path,

567 url.query,

568 url.fragment,

569 )

570 ),

571 **query_args,

572 )

573

574 @classmethod

575 def get_allowed_methods(cls) -> list[str]:

576 """Get allowed methods."""

577 methods = {"OPTIONS", *cls.ALLOWED_METHODS}

578 if "GET" in cls.ALLOWED_METHODS and cls.supports_head():

579 methods.add("HEAD")

580 return sorted(methods)

581

582 def get_bool_argument(

583 self,

584 name: str,

585 default: None | bool = None,

586 ) -> bool:

587 """Get an argument parsed as boolean."""

588 if default is not None:

589 return str_to_bool(self.get_argument(name, ""), default)

590 value = str(self.get_argument(name))

591 try:

592 return str_to_bool(value)

593 except ValueError as err:

594 raise HTTPError(400, f"{value} is not a boolean") from err

595

596 def get_display_scheme(self) -> ColourScheme:

597 """Get the scheme currently displayed."""

598 scheme = self.user_settings.scheme

599 if scheme == "random":

600 return ("light", "dark")[self.now.microsecond & 1]

601 return scheme

602

603 def get_display_theme(self) -> str:

604 """Get the theme currently displayed."""

605 theme = self.user_settings.theme

606

607 if theme == "default" and self.now.month == 12:

608 return "christmas"

609

610 if theme != "random":

611 return theme

612

613 ignore_themes = ("random", "christmas")

614

615 return random_choice( # nosec: B311

616 tuple(theme for theme in THEMES if theme not in ignore_themes)

617 )

618

619 def get_error_message(self, **kwargs: Any) -> str:

620 """

621 Get the error message and return it.

622

623 If the serve_traceback setting is true (debug mode is activated),

624 the traceback gets returned.

625 """

626 if "exc_info" in kwargs and not issubclass(

627 kwargs["exc_info"][0], HTTPError

628 ):

629 if self.settings.get("serve_traceback") or self.is_authorized(

630 Permission.TRACEBACK

631 ):

632 return "".join(

633 traceback.format_exception(*kwargs["exc_info"])

634 ).strip()

635 return "".join(

636 traceback.format_exception_only(*kwargs["exc_info"][:2])

637 ).strip()

638 if "exc_info" in kwargs and issubclass(

639 kwargs["exc_info"][0], MissingArgumentError

640 ):

641 return cast(str, kwargs["exc_info"][1].log_message)

642 return str(self._reason)

643

644 def get_error_page_description(self, status_code: int) -> str:

645 """Get the description for the error page."""

646 # pylint: disable=too-many-return-statements

647 # https://developer.mozilla.org/docs/Web/HTTP/Status

648 if 100 <= status_code <= 199:

649 return "Hier gibt es eine total wichtige Information."

650 if 200 <= status_code <= 299:

651 return "Hier ist alles super! 🎶🎶"

652 if 300 <= status_code <= 399:

653 return "Eine Umleitung ist eingerichtet."

654 if 400 <= status_code <= 499:

655 if status_code == 404:

656 return f"{self.request.path} wurde nicht gefunden."

657 if status_code == 451:

658 return "Hier wäre bestimmt geiler Scheiß."

659 return "Ein Client-Fehler ist aufgetreten."

660 if 500 <= status_code <= 599:

661 return "Ein Server-Fehler ist aufgetreten."

662 raise ValueError(

663 f"{status_code} is not a valid HTTP response status code."

664 )

665

666 def get_int_argument(

667 self,

668 name: str,

669 default: None | int = None,

670 *,

671 max_: None | int = None,

672 min_: None | int = None,

673 ) -> int:

674 """Get an argument parsed as integer."""

675 if default is None:

676 str_value = self.get_argument(name)

677 try:

678 value = int(str_value, base=0)

679 except ValueError as err:

680 raise HTTPError(400, f"{str_value} is not an integer") from err

681 elif self.get_argument(name, ""):

682 try:

683 value = int(self.get_argument(name), base=0)

684 except ValueError:

685 value = default

686 else:

687 value = default

688

689 if max_ is not None:

690 value = min(max_, value)

691 if min_ is not None:

692 value = max(min_, value)

693

694 return value

695

696 def get_module_infos(self) -> tuple[ModuleInfo, ...]:

697 """Get the module infos."""

698 return self.settings.get("MODULE_INFOS") or ()

699

700 def get_reporting_api_endpoint(self) -> None | str:

701 """Get the endpoint for the Reporting API™️."""

702 if not self.settings.get("REPORTING"):

703 return None

704 endpoint = self.settings.get("REPORTING_ENDPOINT")

705

706 if not endpoint or not endpoint.startswith("/"):

707 return endpoint

708

709 return f"{self.request.protocol}://{self.request.host}{endpoint}"

710

711 @override

712 def get_template_namespace(self) -> dict[str, Any]:

713 """

714 Add useful things to the template namespace and return it.

715

716 They are mostly needed by most of the pages (like title,

717 description and no_3rd_party).

718 """

719 namespace = super().get_template_namespace()

720 ansi2html = partial(

721 Ansi2HTMLConverter(inline=True, scheme="xterm").convert, full=False

722 )

723 namespace.update(self.user_settings.as_dict())

724 namespace.update(

725 ansi2html=partial(

726 reduce, apply, (ansi2html, ansi_replace, backspace_replace)

727 ),

728 apm_script=(

729 self.settings["ELASTIC_APM"].get("INLINE_SCRIPT")

730 if self.apm_enabled

731 else None

732 ),

733 as_html=self.content_type == "text/html",

734 c=self.now.date() == date(self.now.year, 4, 1)

735 or str_to_bool(self.get_cookie("c", "f") or "f", False),

736 canonical_url=self.fix_url(

737 self.request.full_url().upper()

738 if self.request.path.upper().startswith("/LOLWUT")

739 else self.request.full_url().lower()

740 ).split("?")[0],

741 description=self.description,

742 display_theme=self.get_display_theme(),

743 display_scheme=self.get_display_scheme(),

744 elastic_rum_url=self.ELASTIC_RUM_URL,

745 fix_static=lambda path: self.fix_url(fix_static_path(path)),

746 fix_url=self.fix_url,

747 emoji2html=(

748 emoji2html

749 if self.user_settings.openmoji == "img"

750 else (

751 (lambda emoji: f'<span class="openmoji">{emoji}</span>')

752 if self.user_settings.openmoji

753 else (lambda emoji: f"<span>{emoji}</span>")

754 )

755 ),

756 form_appendix=self.user_settings.get_form_appendix(),

757 GH_ORG_URL=GH_ORG_URL,

758 GH_PAGES_URL=GH_PAGES_URL,

759 GH_REPO_URL=GH_REPO_URL,

760 keywords="Asoziales Netzwerk, Känguru-Chroniken"

761 + (

762 f", {self.module_info.get_keywords_as_str(self.request.path)}"

763 if self.module_info # type: ignore[truthy-bool]

764 else ""

765 ),

766 lang="de", # TODO: add language support

767 nonce=self.nonce,

768 now=self.now,

769 openmoji_version=OPENMOJI_VERSION,

770 settings=self.settings,

771 short_title=self.short_title,

772 testing=pytest_is_running(),

773 title=self.title,

774 )

775 namespace.update(

776 {

777 "🥚": timedelta()

778 <= self.now.date() - easter(self.now.year)

779 < timedelta(days=2),

780 "🦘": is_prime(self.now.microsecond),

781 }

782 )

783 return namespace

784

785 def get_user_id(self) -> str:

786 """Get the user id saved in the cookie or create one."""

787 cookie = self.get_secure_cookie(

788 "user_id",

789 max_age_days=90,

790 min_version=2,

791 )

792

793 user_id = cookie.decode("UTF-8") if cookie else str(uuid.uuid4())

794

795 if not self.get_secure_cookie( # save it in cookie or reset expiry date

796 "user_id", max_age_days=30, min_version=2

797 ):

798 self.set_secure_cookie(

799 "user_id",

800 user_id,

801 expires_days=90,

802 path="/",

803 samesite="Strict",

804 )

805

806 return user_id

807

808 def handle_accept_header( # pylint: disable=inconsistent-return-statements

809 self, possible_content_types: tuple[str, ...], strict: bool = True

810 ) -> None:

811 """Handle the Accept header and set `self.content_type`."""

812 if not possible_content_types:

813 return

814 content_type = get_best_match(

815 self.request.headers.get("Accept") or "*/*",

816 possible_content_types,

817 )

818 if content_type is None:

819 if strict:

820 return self.handle_not_acceptable(possible_content_types)

821 content_type = possible_content_types[0]

822 self.content_type = content_type

823 self.set_content_type_header()

824

825 def handle_not_acceptable(

826 self, possible_content_types: tuple[str, ...]

827 ) -> None:

828 """Only call this if we cannot respect the Accept header."""

829 self.clear_header("Content-Type")

830 self.set_status(406)

831 raise Finish("\n".join(possible_content_types) + "\n")

832

833 def head(self, *args: Any, **kwargs: Any) -> None | Awaitable[None]:

834 """Handle HEAD requests."""

835 if self.get.__module__ == "tornado.web":

836 raise HTTPError(405)

837 if not self.supports_head():

838 raise HTTPError(501)

839

840 kwargs["head"] = True

841 return self.get(*args, **kwargs)

842

843 @override

844 def initialize(

845 self,

846 *,

847 module_info: ModuleInfo,

848 # default is true, because then empty args dicts are

849 # enough to specify that the defaults should be used

850 default_title: bool = True,

851 default_description: bool = True,

852 ) -> None:

853 """

854 Get title and description from the kwargs.

855

856 If title and description are present in the kwargs,

857 then they override self.title and self.description.

858 """

859 self.module_info = module_info

860 if not default_title:

861 page_info = self.module_info.get_page_info(self.request.path)

862 self.title = page_info.name

863 self.short_title = page_info.short_name or self.title

864 if not default_description:

865 self.description = self.module_info.get_page_info(

866 self.request.path

867 ).description

868

869 @override

870 async def options(self, *args: Any, **kwargs: Any) -> None:

871 """Handle OPTIONS requests."""

872 # pylint: disable=unused-argument

873 self.set_header("Allow", ", ".join(self.get_allowed_methods()))

874 self.set_status(204)

875 await self.finish()

876

877 def origin_trial(self, token: bytes | str) -> bool:

878 """Enable an experimental feature."""

879 # pylint: disable=protected-access

880 payload = json.loads(b64decode(token)[69:])

881 if payload["feature"] in self.active_origin_trials:

882 return True

883 origin = urlsplit(payload["origin"])

884 url = urlsplit(self.request.full_url())

885 if url.port is None and url.scheme in {"http", "https"}:

886 url = url._replace(

887 netloc=f"{url.hostname}:{443 if url.scheme == 'https' else 80}"

888 )

889 if self.request._start_time > payload["expiry"]:

890 return False

891 if url.scheme != origin.scheme:

892 return False

893 if url.netloc != origin.netloc and not (

894 payload.get("isSubdomain")

895 and url.netloc.endswith(f".{origin.netloc}")

896 ):

897 return False

898 self.add_header("Origin-Trial", token)

899 self.active_origin_trials.add(payload["feature"])

900 return True

901

902 @override

903 async def prepare(self) -> None:

904 """Check authorization and call self.ratelimit()."""

905 await super().prepare()

906

907 if self._finished:

908 return

909

910 if not self.ALLOW_COMPRESSION:

911 for transform in self._transforms:

912 if isinstance(transform, GZipContentEncoding):

913 # pylint: disable=protected-access

914 transform._gzipping = False

915

916 self.handle_accept_header(self.POSSIBLE_CONTENT_TYPES)

917

918 if self.request.method == "GET" and (

919 days := Random(self.now.timestamp()).randint(0, 31337)

920 ) in {

921 69,

922 420,

923 1337,

924 31337,

925 }:

926 self.set_cookie("c", "s", expires_days=days / 24, path="/")

927

928 if (

929 self.request.method != "OPTIONS"

930 and self.MAX_BODY_SIZE is not None

931 and len(self.request.body) > self.MAX_BODY_SIZE

932 ):

933 LOGGER.warning(

934 "%s > MAX_BODY_SIZE (%s)",

935 len(self.request.body),

936 self.MAX_BODY_SIZE,

937 )

938 raise HTTPError(413)

939

940 @override

941 def render( # noqa: D102

942 self, template_name: str, **kwargs: Any

943 ) -> Future[None]:

944 self.used_render = True

945 return super().render(template_name, **kwargs)

946

947 render.__doc__ = _RequestHandler.render.__doc__

948

949 def set_content_type_header(self) -> None:

950 """Set the Content-Type header based on `self.content_type`."""

951 if str(self.content_type).startswith("text/"): # RFC 2616 (3.7.1)

952 self.set_header(

953 "Content-Type", f"{self.content_type};charset=utf-8"

954 )

955 elif self.content_type is not None:

956 self.set_header("Content-Type", self.content_type)

957

958 @override

959 def set_cookie( # noqa: D102 # pylint: disable=too-many-arguments

960 self,

961 name: str,

962 value: str | bytes,

963 domain: None | str = None,

964 expires: None | float | tuple[int, ...] | datetime = None,

965 path: str = "/",

966 expires_days: None | float = 400, # changed

967 *,

968 secure: bool | None = None,

969 httponly: bool = True,

970 **kwargs: Any,

971 ) -> None:

972 if "samesite" not in kwargs:

973 # default for same site should be strict

974 kwargs["samesite"] = "Strict"

975

976 super().set_cookie(

977 name,

978 value,

979 domain,

980 expires,

981 path,

982 expires_days,

983 secure=(

984 self.request.protocol == "https" if secure is None else secure

985 ),

986 httponly=httponly,

987 **kwargs,

988 )

989

990 set_cookie.__doc__ = _RequestHandler.set_cookie.__doc__

991

992 def set_csp_header(self) -> None:

993 """Set the Content-Security-Policy header."""

994 self.nonce = secrets.token_urlsafe(16)

995

996 script_src = ["'self'", f"'nonce-{self.nonce}'"]

997

998 if (

999 self.apm_enabled

1000 and "INLINE_SCRIPT_HASH" in self.settings["ELASTIC_APM"]

1001 ):

1002 script_src.extend(

1003 (

1004 f"'sha256-{self.settings['ELASTIC_APM']['INLINE_SCRIPT_HASH']}'",

1005 "'unsafe-inline'", # for browsers that don't support hash

1006 )

1007 )

1008

1009 connect_src = ["'self'"]

1010

1011 if self.apm_enabled and "SERVER_URL" in self.settings["ELASTIC_APM"]:

1012 rum_server_url = self.settings["ELASTIC_APM"].get("RUM_SERVER_URL")

1013 if rum_server_url:

1014 # the RUM agent needs to connect to rum_server_url

1015 connect_src.append(rum_server_url)

1016 elif rum_server_url is None:

1017 # the RUM agent needs to connect to ["ELASTIC_APM"]["SERVER_URL"]

1018 connect_src.append(self.settings["ELASTIC_APM"]["SERVER_URL"])

1019

1020 connect_src.append( # fix for older browsers

1021 ("wss" if self.request.protocol == "https" else "ws")

1022 + f"://{self.request.host}"

1023 )

1024

1025 self.set_header(

1026 "Content-Security-Policy",

1027 "default-src 'self';"

1028 f"script-src {' '.join(script_src)};"

1029 f"connect-src {' '.join(connect_src)};"

1030 "style-src 'self' 'unsafe-inline';"

1031 "img-src 'self' https://img.zeit.de https://github.asozial.org;"

1032 "frame-ancestors 'self';"

1033 "sandbox allow-downloads allow-same-origin allow-modals"

1034 " allow-popups-to-escape-sandbox allow-scripts allow-popups"

1035 " allow-top-navigation-by-user-activation allow-forms;"

1036 "report-to default;"

1037 "base-uri 'none';"

1038 + (

1039 f"report-uri {self.get_reporting_api_endpoint()};"

1040 if self.settings.get("REPORTING")

1041 else ""

1042 ),

1043 )

1044

1045 @override

1046 def set_default_headers(self) -> None:

1047 """Set default headers."""

1048 self.set_csp_header()

1049 self.active_origin_trials = set()

1050 if self.settings.get("REPORTING"):

1051 endpoint = self.get_reporting_api_endpoint()

1052 self.set_header(

1053 "Reporting-Endpoints",

1054 f'default="{endpoint}"', # noqa: B907

1055 )

1056 self.set_header(

1057 "Report-To",

1058 json.dumps(

1059 {

1060 "group": "default",

1061 "max_age": 2592000,

1062 "endpoints": [{"url": endpoint}],

1063 },

1064 option=ORJSON_OPTIONS,

1065 ),

1066 )

1067 self.set_header("NEL", '{"report_to":"default","max_age":2592000}')

1068 self.set_header("X-Content-Type-Options", "nosniff")

1069 self.set_header("Access-Control-Max-Age", "7200")

1070 self.set_header("Access-Control-Allow-Origin", "*")

1071 self.set_header("Access-Control-Allow-Headers", "*")

1072 self.set_header(

1073 "Access-Control-Allow-Methods",

1074 ", ".join(self.get_allowed_methods()),

1075 )

1076 self.set_header("Cross-Origin-Resource-Policy", "cross-origin")

1077 self.set_header(

1078 "Permissions-Policy",

1079 "browsing-topics=(),"

1080 "identity-credentials-get=(),"

1081 "join-ad-interest-group=(),"

1082 "private-state-token-issuance=(),"

1083 "private-state-token-redemption=(),"

1084 "run-ad-auction=()",

1085 )

1086 self.set_header("Referrer-Policy", "same-origin")

1087 self.set_header(

1088 "Cross-Origin-Opener-Policy", "same-origin; report-to=default"

1089 )

1090 if self.request.path == "/kaenguru-comics-alt": # TODO: improve this

1091 self.set_header(

1092 "Cross-Origin-Embedder-Policy",

1093 "credentialless; report-to=default",

1094 )

1095 else:

1096 self.set_header(

1097 "Cross-Origin-Embedder-Policy",

1098 "require-corp; report-to=default",

1099 )

1100 if self.settings.get("HSTS"):

1101 self.set_header("Strict-Transport-Security", "max-age=63072000")

1102 if (

1103 onion_address := self.settings.get("ONION_ADDRESS")

1104 ) and not self.request.host_name.endswith(".onion"):

1105 self.set_header(

1106 "Onion-Location",

1107 onion_address

1108 + self.request.path

1109 + (f"?{self.request.query}" if self.request.query else ""),

1110 )

1111 if self.settings.get("debug"):

1112 self.set_header("X-Debug", bool_to_str(True))

1113 for permission in Permission:

1114 if permission.name:

1115 self.set_header(

1116 f"X-Permission-{permission.name}",

1117 bool_to_str(bool(self.is_authorized(permission))),

1118 )

1119 self.set_header("Vary", "Accept, Authorization, Cookie")

1120

1121 set_default_headers.__doc__ = _RequestHandler.set_default_headers.__doc__

1122

1123 @classmethod

1124 def supports_head(cls) -> bool:

1125 """Check whether this request handler supports HEAD requests."""

1126 signature = inspect.signature(cls.get)

1127 return (

1128 "head" in signature.parameters

1129 and signature.parameters["head"].kind

1130 == inspect.Parameter.KEYWORD_ONLY

1131 )

1132

1133 @cached_property

1134 def user_settings(self) -> Options:

1135 """Get the user settings."""

1136 return Options(self)

1137

1138 @override

1139 def write(self, chunk: str | bytes | dict[str, Any]) -> None: # noqa: D102

1140 if self._finished:

1141 raise RuntimeError("Cannot write() after finish()")

1142

1143 self.set_content_type_header()

1144

1145 if isinstance(chunk, dict):

1146 chunk = self.dump(chunk)

1147

1148 if self.now.date() == date(self.now.year, 4, 27):

1149 if isinstance(chunk, bytes):

1150 with contextlib.suppress(UnicodeDecodeError):

1151 chunk = chunk.decode("UTF-8")

1152 if isinstance(chunk, str):

1153 chunk = regex.sub(

1154 r"\b\p{Lu}\p{Ll}{4}\p{Ll}*\b",

1155 lambda match: (

1156 "Stanley"

1157 if Random(match[0]).randrange(5) == self.now.year % 5

1158 else match[0]

1159 ),

1160 chunk,

1161 )

1162

1163 super().write(chunk)

1164

1165 write.__doc__ = _RequestHandler.write.__doc__

1166

1167 @override

1168 def write_error(self, status_code: int, **kwargs: Any) -> None:

1169 """Render the error page."""

1170 dict_content_types: tuple[str, str] = (

1171 "application/json",

1172 "application/yaml",

1173 )

1174 all_error_content_types: tuple[str, ...] = (

1175 # text/plain as first (default), to not screw up output in terminals

1176 "text/plain",

1177 "text/html",

1178 "text/markdown",

1179 *dict_content_types,

1180 "application/vnd.asozial.dynload+json",

1181 )

1182

1183 if self.content_type not in all_error_content_types:

1184 # don't send 406, instead default with text/plain

1185 self.handle_accept_header(all_error_content_types, strict=False)

1186

1187 if self.content_type == "text/html":

1188 self.render( # type: ignore[unused-awaitable]

1189 "error.html",

1190 status=status_code,

1191 reason=self.get_error_message(**kwargs),

1192 description=self.get_error_page_description(status_code),

1193 is_traceback="exc_info" in kwargs

1194 and not issubclass(kwargs["exc_info"][0], HTTPError)

1195 and (

1196 self.settings.get("serve_traceback")

1197 or self.is_authorized(Permission.TRACEBACK)

1198 ),

1199 )

1200 return

1201

1202 if self.content_type in dict_content_types:

1203 self.finish( # type: ignore[unused-awaitable]

1204 {

1205 "status": status_code,

1206 "reason": self.get_error_message(**kwargs),

1207 }

1208 )

1209 return

1210

1211 self.finish( # type: ignore[unused-awaitable]

1212 f"{status_code} {self.get_error_message(**kwargs)}\n"

1213 )

1214

1215 write_error.__doc__ = _RequestHandler.write_error.__doc__

Coverage for an_website/utils/base_request_handler.py: 78.528%

489 statements