Coverage for an_website / utils / base_request_handler.py: 79.032%

496 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-11 19:37 +0000

1# This program is free software: you can redistribute it and/or modify 

2# it under the terms of the GNU Affero General Public License as 

3# published by the Free Software Foundation, either version 3 of the 

4# License, or (at your option) any later version. 

5# 

6# This program is distributed in the hope that it will be useful, 

7# but WITHOUT ANY WARRANTY; without even the implied warranty of 

8# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

9# GNU Affero General Public License for more details. 

10# 

11# You should have received a copy of the GNU Affero General Public License 

12# along with this program. If not, see <https://www.gnu.org/licenses/>. 

13# pylint: disable=too-many-lines 

14 

15""" 

16The base request handler used by other modules. 

17 

18This should only contain the BaseRequestHandler class. 

19""" 

20 

21from __future__ import annotations 

22 

23import contextlib 

24import inspect 

25import logging 

26import secrets 

27import sys 

28import traceback 

29import uuid 

30from asyncio import Future 

31from base64 import b64decode 

32from collections.abc import Awaitable, Callable, Coroutine, Mapping 

33from contextvars import ContextVar 

34from datetime import date, datetime, timedelta, timezone, tzinfo 

35from functools import cached_property, partial, reduce 

36from random import Random, choice as random_choice 

37from types import TracebackType 

38from typing import Any, ClassVar, Final, cast, override 

39from urllib.parse import SplitResult, urlsplit, urlunsplit 

40from zoneinfo import ZoneInfo 

41 

42import elasticapm 

43import html2text 

44import orjson as json 

45import regex 

46import tornado.web 

47import yaml 

48from accept_types import get_best_match # type: ignore[import-untyped] 

49from ansi2html import Ansi2HTMLConverter 

50from bs4 import BeautifulSoup 

51from dateutil.easter import easter 

52from elastic_transport import ApiError, TransportError 

53from elasticsearch import AsyncElasticsearch 

54from openmoji_dist import VERSION as OPENMOJI_VERSION 

55from redis.asyncio import Redis 

56from tornado.httputil import HTTPServerRequest 

57from tornado.iostream import StreamClosedError 

58from tornado.web import ( 

59 Finish, 

60 GZipContentEncoding, 

61 HTTPError, 

62 MissingArgumentError, 

63 OutputTransform, 

64) 

65 

66from .. import ( 

67 EVENT_ELASTICSEARCH, 

68 EVENT_REDIS, 

69 GH_ORG_URL, 

70 GH_PAGES_URL, 

71 GH_REPO_URL, 

72 NAME, 

73 ORJSON_OPTIONS, 

74 pytest_is_running, 

75) 

76from .decorators import is_authorized 

77from .options import ColourScheme, Options 

78from .static_file_handling import FILE_HASHES_DICT, fix_static_path 

79from .themes import THEMES 

80from .utils import ( 

81 ModuleInfo, 

82 Permission, 

83 add_args_to_url, 

84 ansi_replace, 

85 apply, 

86 backspace_replace, 

87 bool_to_str, 

88 emoji2html, 

89 geoip, 

90 hash_bytes, 

91 is_prime, 

92 ratelimit, 

93 str_to_bool, 

94) 

95 

96LOGGER: Final = logging.getLogger(__name__) 

97 

98TEXT_CONTENT_TYPES: Final[set[str]] = { 

99 "application/javascript", 

100 "application/json", 

101 "application/vnd.asozial.dynload+json", 

102 "application/x-ndjson", 

103 "application/xml", 

104 "application/yaml", 

105} 

106 

107CLACKS_OVERHEADS = ( 

108 "GNU Aaron Swartz", 

109 "GNU Carol Angie Deborah Maltesi", 

110 "GNU Charlotte Angie", 

111 "GNU Terry Pratchett", 

112) 

113 

114request_ctx_var: ContextVar[HTTPServerRequest] = ContextVar("current_request") 

115 

116 

117class _RequestHandler(tornado.web.RequestHandler): 

118 """Base for Tornado request handlers.""" 

119 

120 crawler: bool = False 

121 

122 @override 

123 async def _execute( 

124 self, transforms: list[OutputTransform], *args: bytes, **kwargs: bytes 

125 ) -> None: 

126 request_ctx_var.set(self.request) 

127 return await super()._execute(transforms, *args, **kwargs) 

128 

129 # pylint: disable-next=protected-access 

130 _execute.__doc__ = tornado.web.RequestHandler._execute.__doc__ 

131 

132 @property 

133 def apm_client(self) -> None | elasticapm.Client: 

134 """Get the APM client from the settings.""" 

135 return self.settings.get("ELASTIC_APM", {}).get("CLIENT") # type: ignore[no-any-return] 

136 

137 @property 

138 def apm_enabled(self) -> bool: 

139 """Return whether APM is enabled.""" 

140 return bool(self.settings.get("ELASTIC_APM", {}).get("ENABLED")) 

141 

142 @override 

143 def data_received( # noqa: D102 

144 self, chunk: bytes 

145 ) -> None | Awaitable[None]: 

146 pass 

147 

148 data_received.__doc__ = tornado.web.RequestHandler.data_received.__doc__ 

149 

150 @property 

151 def elasticsearch(self) -> AsyncElasticsearch: 

152 """ 

153 Get the Elasticsearch client from the settings. 

154 

155 This is None if Elasticsearch is not enabled. 

156 """ 

157 return cast(AsyncElasticsearch, self.settings.get("ELASTICSEARCH")) 

158 

159 @property 

160 def elasticsearch_prefix(self) -> str: 

161 """Get the Elasticsearch prefix from the settings.""" 

162 return self.settings.get( # type: ignore[no-any-return] 

163 "ELASTICSEARCH_PREFIX", NAME 

164 ) 

165 

166 def geoip( 

167 self, 

168 ip: None | str = None, 

169 database: str = geoip.__defaults__[0], # type: ignore[index] 

170 *, 

171 allow_fallback: bool = True, 

172 ) -> Coroutine[None, None, None | dict[str, Any]]: 

173 """Get GeoIP information.""" 

174 if not ip: 

175 ip = self.request.remote_ip 

176 if not EVENT_ELASTICSEARCH.is_set(): 

177 return geoip(ip, database) 

178 return geoip( 

179 ip, database, self.elasticsearch, allow_fallback=allow_fallback 

180 ) 

181 

182 async def get_time(self) -> datetime: 

183 """Get the start time of the request in the users' timezone.""" 

184 tz: tzinfo = timezone.utc 

185 try: 

186 geoip = await self.geoip() # pylint: disable=redefined-outer-name 

187 except (ApiError, TransportError): 

188 LOGGER.exception("Elasticsearch request failed") 

189 if self.apm_client: 

190 self.apm_client.capture_exception() # type: ignore[no-untyped-call] 

191 else: 

192 if geoip and "timezone" in geoip: 

193 tz = ZoneInfo(geoip["timezone"]) 

194 return datetime.fromtimestamp( 

195 self.request._start_time, tz=tz # pylint: disable=protected-access 

196 ) 

197 

198 def is_authorized( 

199 self, permission: Permission, allow_cookie_auth: bool = True 

200 ) -> bool | None: 

201 """Check whether the request is authorized.""" 

202 return is_authorized(self, permission, allow_cookie_auth) 

203 

204 @override 

205 def log_exception( 

206 self, 

207 typ: None | type[BaseException], 

208 value: None | BaseException, 

209 tb: None | TracebackType, 

210 ) -> None: 

211 if isinstance(value, HTTPError): 

212 super().log_exception(typ, value, tb) 

213 elif typ is StreamClosedError: 

214 LOGGER.debug( 

215 "Stream closed %s", 

216 self._request_summary(), 

217 exc_info=(typ, value, tb), # type: ignore[arg-type] 

218 ) 

219 else: 

220 LOGGER.error( 

221 "Uncaught exception %s", 

222 self._request_summary(), 

223 exc_info=(typ, value, tb), # type: ignore[arg-type] 

224 ) 

225 

226 log_exception.__doc__ = tornado.web.RequestHandler.log_exception.__doc__ 

227 

228 @cached_property 

229 def now(self) -> datetime: 

230 """Get the current time.""" 

231 # pylint: disable=method-hidden 

232 if pytest_is_running(): 

233 raise AssertionError("Now accessed before it was set") 

234 # if self.request.method in self.SUPPORTED_METHODS: # Why? 

235 LOGGER.error("Now accessed before it was set", stacklevel=3) 

236 return self.now_utc 

237 

238 @cached_property 

239 def now_utc(self) -> datetime: 

240 """Get the current time in the correct timezone.""" 

241 return datetime.fromtimestamp( 

242 self.request._start_time, # pylint: disable=protected-access 

243 tz=timezone.utc, 

244 ) 

245 

246 @override 

247 async def prepare(self) -> None: 

248 """Check authorization and call self.ratelimit().""" 

249 # pylint: disable=invalid-overridden-method 

250 self.now = await self.get_time() 

251 

252 if crawler_secret := self.settings.get("CRAWLER_SECRET"): 

253 self.crawler = crawler_secret in self.request.headers.get( 

254 "User-Agent", "" 

255 ) 

256 

257 if ( 

258 self.request.method in {"GET", "HEAD"} 

259 and self.redirect_to_canonical_domain() 

260 ): 

261 return 

262 

263 if self.request.method != "OPTIONS" and not await self.ratelimit(True): 

264 await self.ratelimit() 

265 

266 async def ratelimit(self, global_ratelimit: bool = False) -> bool: 

267 """Take b1nzy to space using Redis.""" 

268 if ( 

269 not self.settings.get("RATELIMITS") 

270 or self.request.method == "OPTIONS" 

271 or self.is_authorized(Permission.RATELIMITS) 

272 or self.crawler 

273 ): 

274 return False 

275 

276 if not EVENT_REDIS.is_set(): 

277 LOGGER.warning( 

278 ( 

279 "Ratelimits are enabled, but Redis is not available. " 

280 "This can happen shortly after starting the website." 

281 ), 

282 ) 

283 raise HTTPError(503) 

284 

285 if global_ratelimit: # TODO: add to _RequestHandler 

286 ratelimited, headers = await ratelimit( 

287 self.redis, 

288 self.redis_prefix, 

289 str(self.request.remote_ip), 

290 bucket=None, 

291 max_burst=99, # limit = 100 

292 count_per_period=20, # 20 requests per second 

293 period=1, 

294 tokens=10 if self.settings.get("UNDER_ATTACK") else 1, 

295 ) 

296 else: 

297 method = ( 

298 "GET" if self.request.method == "HEAD" else self.request.method 

299 ) 

300 if not (limit := getattr(self, f"RATELIMIT_{method}_LIMIT", 0)): 

301 return False 

302 ratelimited, headers = await ratelimit( 

303 self.redis, 

304 self.redis_prefix, 

305 str(self.request.remote_ip), 

306 bucket=getattr( 

307 self, 

308 f"RATELIMIT_{method}_BUCKET", 

309 self.__class__.__name__.lower(), 

310 ), 

311 max_burst=limit - 1, 

312 count_per_period=getattr( # request count per period 

313 self, 

314 f"RATELIMIT_{method}_COUNT_PER_PERIOD", 

315 30, 

316 ), 

317 period=getattr( 

318 self, f"RATELIMIT_{method}_PERIOD", 60 # period in seconds 

319 ), 

320 tokens=1 if self.request.method != "HEAD" else 0, 

321 ) 

322 

323 for header, value in headers.items(): 

324 self.set_header(header, value) 

325 

326 if ratelimited: 

327 if self.now.date() == date(self.now.year, 4, 20): 

328 self.set_status(420) 

329 self.write_error(420) 

330 else: 

331 self.set_status(429) 

332 self.write_error(429) 

333 

334 return ratelimited 

335 

336 def redirect_to_canonical_domain(self) -> bool: 

337 """Redirect to the canonical domain.""" 

338 if ( 

339 not (domain := self.settings.get("DOMAIN")) 

340 or not self.request.headers.get("Host") 

341 or self.request.host_name == domain 

342 or self.request.host_name.endswith((".onion", ".i2p")) 

343 or regex.fullmatch(r"/[\u2800-\u28FF]+/?", self.request.path) 

344 ): 

345 return False 

346 port = urlsplit(f"//{self.request.headers['Host']}").port 

347 self.redirect( 

348 urlsplit(self.request.full_url()) 

349 ._replace(netloc=f"{domain}:{port}" if port else domain) 

350 .geturl(), 

351 permanent=True, 

352 ) 

353 return True 

354 

355 @property 

356 def redis(self) -> Redis[str]: 

357 """ 

358 Get the Redis client from the settings. 

359 

360 This is None if Redis is not enabled. 

361 """ 

362 return cast("Redis[str]", self.settings.get("REDIS")) 

363 

364 @property 

365 def redis_prefix(self) -> str: 

366 """Get the Redis prefix from the settings.""" 

367 return self.settings.get( # type: ignore[no-any-return] 

368 "REDIS_PREFIX", NAME 

369 ) 

370 

371 

372class BaseRequestHandler(_RequestHandler): 

373 """The base request handler used by every page and API.""" 

374 

375 # pylint: disable=too-many-instance-attributes, too-many-public-methods 

376 

377 ELASTIC_RUM_URL: ClassVar[str] = ( 

378 f"/@apm-rum/elastic-apm-rum.umd{'' if sys.flags.dev_mode else '.min'}.js" 

379 "?v=5.12.0" 

380 ) 

381 

382 COMPUTE_ETAG: ClassVar[bool] = True 

383 ALLOW_COMPRESSION: ClassVar[bool] = True 

384 MAX_BODY_SIZE: ClassVar[None | int] = None 

385 ALLOWED_METHODS: ClassVar[tuple[str, ...]] = ("GET",) 

386 POSSIBLE_CONTENT_TYPES: ClassVar[tuple[str, ...]] = () 

387 

388 module_info: ModuleInfo 

389 # info about page, can be overridden in module_info 

390 title: str = "Das Asoziale Netzwerk" 

391 short_title: str = "Asoziales Netzwerk" 

392 description: str = "Die tolle Webseite des Asozialen Netzwerks" 

393 

394 used_render: bool = False 

395 

396 active_origin_trials: set[str] 

397 content_type: None | str = None 

398 apm_script: None | str 

399 nonce: str 

400 

401 def _finish( 

402 self, chunk: None | str | bytes | dict[str, Any] = None 

403 ) -> Future[None]: 

404 if self._finished: 

405 raise RuntimeError("finish() called twice") 

406 

407 if chunk is not None: 

408 self.write(chunk) 

409 

410 if ( # pylint: disable=too-many-boolean-expressions 

411 (content_type := self.content_type) 

412 and ( 

413 content_type in TEXT_CONTENT_TYPES 

414 or content_type.startswith("text/") 

415 or content_type.endswith(("+xml", "+json")) 

416 ) 

417 and self._write_buffer 

418 and not self._write_buffer[-1].endswith(b"\n") 

419 ): 

420 self.write(b"\n") 

421 

422 return super().finish() 

423 

424 @override 

425 def compute_etag(self) -> None | str: 

426 """Compute ETag with Base85 encoding.""" 

427 if not self.COMPUTE_ETAG: 

428 return None 

429 return f'"{hash_bytes(*self._write_buffer)}"' # noqa: B907 

430 

431 @override 

432 def decode_argument( # noqa: D102 

433 self, value: bytes, name: str | None = None 

434 ) -> str: 

435 try: 

436 return value.decode("UTF-8", "replace") 

437 except UnicodeDecodeError as exc: 

438 err_msg = f"Invalid unicode in {name or 'url'}: {value[:40]!r}" 

439 LOGGER.exception(err_msg, exc_info=exc) 

440 raise HTTPError(400, err_msg) from exc 

441 

442 @property 

443 def dump(self) -> Callable[[Any], str | bytes]: 

444 """Get the function for dumping the output.""" 

445 yaml_subset = self.content_type in { 

446 "application/json", 

447 "application/vnd.asozial.dynload+json", 

448 } 

449 

450 if self.content_type == "application/yaml": 

451 if self.now.timetuple()[2:0:-1] == (1, 4): 

452 yaml_subset = True 

453 else: 

454 return lambda spam: yaml.dump( 

455 spam, 

456 width=self.get_int_argument("yaml_width", 80, min_=80), 

457 ) 

458 

459 if yaml_subset: 

460 option = ORJSON_OPTIONS 

461 if self.get_bool_argument("pretty", False): 

462 option |= json.OPT_INDENT_2 

463 return lambda spam: json.dumps(spam, option=option) 

464 

465 return lambda spam: spam 

466 

467 @override 

468 def finish( # noqa: D102 

469 self, chunk: None | str | bytes | dict[Any, Any] = None 

470 ) -> Future[None]: 

471 as_json = self.content_type == "application/vnd.asozial.dynload+json" 

472 as_plain_text = self.content_type == "text/plain" 

473 as_markdown = self.content_type == "text/markdown" 

474 

475 if ( 

476 not isinstance(chunk, bytes | str) 

477 or self.content_type == "text/html" 

478 or not self.used_render 

479 or not (as_json or as_plain_text or as_markdown) 

480 ): 

481 return self._finish(chunk) 

482 

483 chunk = chunk.decode("UTF-8") if isinstance(chunk, bytes) else chunk 

484 

485 if as_markdown: 

486 return self._finish( 

487 f"# {self.title}\n\n" 

488 + html2text.html2text(chunk, self.request.full_url()).strip() 

489 ) 

490 

491 soup = BeautifulSoup(chunk, features="lxml") 

492 

493 if as_plain_text: 

494 return self._finish(soup.get_text("\n", True)) 

495 

496 dictionary: dict[str, object] = { 

497 "url": self.fix_url(include_protocol_and_host=True), 

498 "title": self.title, 

499 "short_title": ( 

500 self.short_title if self.title != self.short_title else None 

501 ), 

502 "body": "".join( 

503 str(element) 

504 for element in soup.find_all(name="main")[0].contents 

505 ).strip(), 

506 "scripts": [ 

507 {"script": script.string} | script.attrs 

508 for script in soup.find_all("script") 

509 ], 

510 "stylesheets": [ 

511 stylesheet.get("href").strip() 

512 for stylesheet in soup.find_all("link", rel="stylesheet") 

513 ], 

514 "css": "\n".join(style.string for style in soup.find_all("style")), 

515 } 

516 

517 return self._finish(dictionary) 

518 

519 finish.__doc__ = _RequestHandler.finish.__doc__ 

520 

521 def finish_dict(self, **kwargs: Any) -> Future[None]: 

522 """Finish the request with a dictionary.""" 

523 return self.finish(kwargs) 

524 

525 def fix_url( 

526 self, 

527 url: None | str | SplitResult = None, 

528 new_path: None | str = None, 

529 include_protocol_and_host: bool | str = False, 

530 query_args: Mapping[str, None | str | bool | float] | None = None, 

531 ) -> str: 

532 """ 

533 Fix a URL and return it. 

534 

535 If the URL is from another website, link to it with the redirect page, 

536 otherwise just return the URL with no_3rd_party appended. 

537 """ 

538 query_args_d = dict(query_args or {}) 

539 del query_args 

540 if url is None: 

541 url = self.request.full_url() 

542 if isinstance(url, str): 

543 url = urlsplit(url) 

544 if url.netloc and url.netloc.lower() != self.request.host.lower(): 

545 if ( 

546 not self.user_settings.ask_before_leaving 

547 or not self.settings.get("REDIRECT_MODULE_LOADED") 

548 ): 

549 return url.geturl() 

550 path = "/redirect" 

551 query_args_d["to"] = url.geturl() 

552 url = urlsplit(self.request.full_url()) 

553 else: 

554 path = url.path if new_path is None else new_path 

555 path = f"/{path.strip('/')}".lower() 

556 if path == "/lolwut": 

557 path = path.upper() 

558 if path.startswith("/soundboard/files/") or path in FILE_HASHES_DICT: 

559 query_args_d.update( 

560 dict.fromkeys(self.user_settings.iter_option_names()) 

561 ) 

562 else: 

563 for ( 

564 key, 

565 value, 

566 ) in self.user_settings.as_dict_with_str_values().items(): 

567 query_args_d.setdefault(key, value) 

568 for key, value in self.user_settings.as_dict_with_str_values( 

569 include_query_argument=False, 

570 include_body_argument=self.request.path == "/einstellungen" 

571 and self.get_bool_argument("save_in_cookie", False), 

572 ).items(): 

573 if value == query_args_d[key]: 

574 query_args_d[key] = None 

575 

576 result = add_args_to_url( 

577 urlunsplit( 

578 ( 

579 self.request.protocol, 

580 self.request.host, 

581 path, 

582 url.query, 

583 url.fragment, 

584 ) 

585 ), 

586 **query_args_d, 

587 ) 

588 

589 return ( 

590 result 

591 if include_protocol_and_host 

592 else result.removeprefix( 

593 f"{self.request.protocol}://{self.request.host}" 

594 ) 

595 ) 

596 

597 @classmethod 

598 def get_allowed_methods(cls) -> list[str]: 

599 """Get allowed methods.""" 

600 methods = {"OPTIONS", *cls.ALLOWED_METHODS} 

601 if "GET" in cls.ALLOWED_METHODS and cls.supports_head(): 

602 methods.add("HEAD") 

603 return sorted(methods) 

604 

605 def get_bool_argument( 

606 self, 

607 name: str, 

608 default: None | bool = None, 

609 ) -> bool: 

610 """Get an argument parsed as boolean.""" 

611 if default is not None: 

612 return str_to_bool(self.get_argument(name, ""), default) 

613 value = str(self.get_argument(name)) 

614 try: 

615 return str_to_bool(value) 

616 except ValueError as err: 

617 raise HTTPError(400, f"{value} is not a boolean") from err 

618 

619 def get_display_scheme(self) -> ColourScheme: 

620 """Get the scheme currently displayed.""" 

621 scheme = self.user_settings.scheme 

622 if scheme == "random": 

623 return ("light", "dark")[self.now.microsecond & 1] 

624 return scheme 

625 

626 def get_display_theme(self) -> str: 

627 """Get the theme currently displayed.""" 

628 theme = self.user_settings.theme 

629 

630 if theme == "default" and self.now.month == 12: 

631 return "christmas" 

632 

633 if theme != "random": 

634 return theme 

635 

636 ignore_themes = ("random", "christmas") 

637 

638 return random_choice( # nosec: B311 

639 tuple(theme for theme in THEMES if theme not in ignore_themes) 

640 ) 

641 

642 def get_error_message(self, **kwargs: Any) -> str: 

643 """ 

644 Get the error message and return it. 

645 

646 If the serve_traceback setting is true (debug mode is activated), 

647 the traceback gets returned. 

648 """ 

649 if "exc_info" in kwargs and not issubclass( 

650 kwargs["exc_info"][0], HTTPError 

651 ): 

652 if self.settings.get("serve_traceback") or self.is_authorized( 

653 Permission.TRACEBACK 

654 ): 

655 return "".join( 

656 traceback.format_exception(*kwargs["exc_info"]) 

657 ).strip() 

658 return "".join( 

659 traceback.format_exception_only(*kwargs["exc_info"][:2]) 

660 ).strip() 

661 if "exc_info" in kwargs and issubclass( 

662 kwargs["exc_info"][0], MissingArgumentError 

663 ): 

664 return cast(str, kwargs["exc_info"][1].log_message) 

665 return str(self._reason) 

666 

667 def get_error_page_description(self, status_code: int) -> str: 

668 """Get the description for the error page.""" 

669 # pylint: disable=too-many-return-statements 

670 # https://developer.mozilla.org/docs/Web/HTTP/Status 

671 if 100 <= status_code <= 199: 

672 return "Hier gibt es eine total wichtige Information." 

673 if 200 <= status_code <= 299: 

674 return "Hier ist alles super! 🎶🎶" 

675 if 300 <= status_code <= 399: 

676 return "Eine Umleitung ist eingerichtet." 

677 if 400 <= status_code <= 499: 

678 if status_code == 404: 

679 return f"{self.request.path} wurde nicht gefunden." 

680 if status_code == 451: 

681 return "Hier wäre bestimmt geiler Scheiß." 

682 return "Ein Client-Fehler ist aufgetreten." 

683 if 500 <= status_code <= 599: 

684 return "Ein Server-Fehler ist aufgetreten." 

685 raise ValueError( 

686 f"{status_code} is not a valid HTTP response status code." 

687 ) 

688 

689 def get_int_argument( 

690 self, 

691 name: str, 

692 default: None | int = None, 

693 *, 

694 max_: None | int = None, 

695 min_: None | int = None, 

696 ) -> int: 

697 """Get an argument parsed as integer.""" 

698 if default is None: 

699 str_value = self.get_argument(name) 

700 try: 

701 value = int(str_value, base=0) 

702 except ValueError as err: 

703 raise HTTPError(400, f"{str_value} is not an integer") from err 

704 elif self.get_argument(name, ""): 

705 try: 

706 value = int(self.get_argument(name), base=0) 

707 except ValueError: 

708 value = default 

709 else: 

710 value = default 

711 

712 if max_ is not None: 

713 value = min(max_, value) 

714 if min_ is not None: 

715 value = max(min_, value) 

716 

717 return value 

718 

719 def get_module_infos(self) -> tuple[ModuleInfo, ...]: 

720 """Get the module infos.""" 

721 return self.settings.get("MODULE_INFOS") or () 

722 

723 def get_reporting_api_endpoint(self) -> None | str: 

724 """Get the endpoint for the Reporting API™️.""" 

725 if not self.settings.get("REPORTING"): 

726 return None 

727 endpoint = self.settings.get("REPORTING_ENDPOINT") 

728 

729 if not endpoint or not endpoint.startswith("/"): 

730 return endpoint 

731 

732 return f"{self.request.protocol}://{self.request.host}{endpoint}" 

733 

734 @override 

735 def get_template_namespace(self) -> dict[str, Any]: 

736 """ 

737 Add useful things to the template namespace and return it. 

738 

739 They are mostly needed by most of the pages (like title, 

740 description and no_3rd_party). 

741 """ 

742 namespace = super().get_template_namespace() 

743 ansi2html = partial( 

744 Ansi2HTMLConverter(inline=True, scheme="xterm").convert, full=False 

745 ) 

746 namespace.update(self.user_settings.as_dict()) 

747 namespace.update( 

748 ansi2html=partial( 

749 reduce, apply, (ansi2html, ansi_replace, backspace_replace) 

750 ), 

751 apm_script=( 

752 self.settings["ELASTIC_APM"].get("INLINE_SCRIPT") 

753 if self.apm_enabled 

754 else None 

755 ), 

756 as_html=self.content_type == "text/html", 

757 c=self.now.date() == date(self.now.year, 4, 1) 

758 or str_to_bool(self.get_cookie("c", "f") or "f", False), 

759 canonical_url=self.request.protocol 

760 + "://" 

761 + (self.settings["DOMAIN"] or self.request.host) 

762 + self.fix_url( 

763 self.request.full_url().upper() 

764 if self.request.path.upper().startswith("/LOLWUT") 

765 else self.request.full_url().lower() 

766 ) 

767 .split("?")[0] 

768 .removesuffix("/"), 

769 description=self.description, 

770 display_theme=self.get_display_theme(), 

771 display_scheme=self.get_display_scheme(), 

772 elastic_rum_url=self.ELASTIC_RUM_URL, 

773 fix_static=lambda path: self.fix_url(fix_static_path(path)), 

774 fix_url=self.fix_url, 

775 emoji2html=( 

776 emoji2html 

777 if self.user_settings.openmoji == "img" 

778 else ( 

779 (lambda emoji: f'<span class="openmoji">{emoji}</span>') 

780 if self.user_settings.openmoji 

781 else (lambda emoji: f"<span>{emoji}</span>") 

782 ) 

783 ), 

784 form_appendix=self.user_settings.get_form_appendix(), 

785 GH_ORG_URL=GH_ORG_URL, 

786 GH_PAGES_URL=GH_PAGES_URL, 

787 GH_REPO_URL=GH_REPO_URL, 

788 keywords="Asoziales Netzwerk, Känguru-Chroniken" 

789 + ( 

790 f", {self.module_info.get_keywords_as_str(self.request.path)}" 

791 if self.module_info # type: ignore[truthy-bool] 

792 else "" 

793 ), 

794 lang="de", # TODO: add language support 

795 nonce=self.nonce, 

796 now=self.now, 

797 openmoji_version=OPENMOJI_VERSION, 

798 settings=self.settings, 

799 short_title=self.short_title, 

800 testing=pytest_is_running(), 

801 title=self.title, 

802 ) 

803 namespace.update( 

804 { 

805 "🥚": timedelta() 

806 <= self.now.date() - easter(self.now.year) 

807 < timedelta(days=2), 

808 "🦘": is_prime(self.now.microsecond), 

809 } 

810 ) 

811 return namespace 

812 

813 def get_user_id(self) -> str: 

814 """Get the user id saved in the cookie or create one.""" 

815 cookie = self.get_secure_cookie( 

816 "user_id", 

817 max_age_days=90, 

818 min_version=2, 

819 ) 

820 

821 user_id = cookie.decode("UTF-8") if cookie else str(uuid.uuid4()) 

822 

823 if not self.get_secure_cookie( # save it in cookie or reset expiry date 

824 "user_id", max_age_days=30, min_version=2 

825 ): 

826 self.set_secure_cookie( 

827 "user_id", 

828 user_id, 

829 expires_days=90, 

830 path="/", 

831 samesite="Strict", 

832 ) 

833 

834 return user_id 

835 

836 def handle_accept_header( # pylint: disable=inconsistent-return-statements 

837 self, possible_content_types: tuple[str, ...], strict: bool = True 

838 ) -> None: 

839 """Handle the Accept header and set `self.content_type`.""" 

840 if not possible_content_types: 

841 return 

842 content_type = get_best_match( 

843 self.request.headers.get("Accept") or "*/*", 

844 possible_content_types, 

845 ) 

846 if content_type is None: 

847 if strict: 

848 return self.handle_not_acceptable(possible_content_types) 

849 content_type = possible_content_types[0] 

850 self.content_type = content_type 

851 self.set_content_type_header() 

852 

853 def handle_not_acceptable( 

854 self, possible_content_types: tuple[str, ...] 

855 ) -> None: 

856 """Only call this if we cannot respect the Accept header.""" 

857 self.clear_header("Content-Type") 

858 self.set_status(406) 

859 raise Finish("\n".join(possible_content_types) + "\n") 

860 

861 def head(self, *args: Any, **kwargs: Any) -> None | Awaitable[None]: 

862 """Handle HEAD requests.""" 

863 if self.get.__module__ == "tornado.web": 

864 raise HTTPError(405) 

865 if not self.supports_head(): 

866 raise HTTPError(501) 

867 

868 kwargs["head"] = True 

869 return self.get(*args, **kwargs) 

870 

871 @override 

872 def initialize( 

873 self, 

874 *, 

875 module_info: ModuleInfo, 

876 # default is true, because then empty args dicts are 

877 # enough to specify that the defaults should be used 

878 default_title: bool = True, 

879 default_description: bool = True, 

880 ) -> None: 

881 """ 

882 Get title and description from the kwargs. 

883 

884 If title and description are present in the kwargs, 

885 then they override self.title and self.description. 

886 """ 

887 self.module_info = module_info 

888 if not default_title: 

889 page_info = self.module_info.get_page_info(self.request.path) 

890 self.title = page_info.name 

891 self.short_title = page_info.short_name or self.title 

892 if not default_description: 

893 self.description = self.module_info.get_page_info( 

894 self.request.path 

895 ).description 

896 

897 @override 

898 async def options(self, *args: Any, **kwargs: Any) -> None: 

899 """Handle OPTIONS requests.""" 

900 # pylint: disable=unused-argument 

901 self.set_header("Allow", ", ".join(self.get_allowed_methods())) 

902 self.set_status(204) 

903 await self.finish() 

904 

905 def origin_trial(self, token: bytes | str) -> bool: 

906 """Enable an experimental feature.""" 

907 # pylint: disable=protected-access 

908 payload = json.loads(b64decode(token)[69:]) 

909 if payload["feature"] in self.active_origin_trials: 

910 return True 

911 origin = urlsplit(payload["origin"]) 

912 url = urlsplit(self.request.full_url()) 

913 if url.port is None and url.scheme in {"http", "https"}: 

914 url = url._replace( 

915 netloc=f"{url.hostname}:{443 if url.scheme == 'https' else 80}" 

916 ) 

917 if self.request._start_time > payload["expiry"]: 

918 return False 

919 if url.scheme != origin.scheme: 

920 return False 

921 if url.netloc != origin.netloc and not ( 

922 payload.get("isSubdomain") 

923 and url.netloc.endswith(f".{origin.netloc}") 

924 ): 

925 return False 

926 self.add_header("Origin-Trial", token) 

927 self.active_origin_trials.add(payload["feature"]) 

928 return True 

929 

930 @override 

931 async def prepare(self) -> None: 

932 """Check authorization and call self.ratelimit().""" 

933 await super().prepare() 

934 

935 if self._finished: 

936 return 

937 

938 if not self.ALLOW_COMPRESSION: 

939 for transform in self._transforms: 

940 if isinstance(transform, GZipContentEncoding): 

941 # pylint: disable=protected-access 

942 transform._gzipping = False 

943 

944 self.handle_accept_header(self.POSSIBLE_CONTENT_TYPES) 

945 

946 if self.request.method == "GET" and ( 

947 days := Random(self.now.timestamp()).randint(0, 31337) 

948 ) in { 

949 69, 

950 420, 

951 1337, 

952 31337, 

953 }: 

954 self.set_cookie("c", "s", expires_days=days / 24, path="/") 

955 

956 if ( 

957 self.request.method != "OPTIONS" 

958 and self.MAX_BODY_SIZE is not None 

959 and len(self.request.body) > self.MAX_BODY_SIZE 

960 ): 

961 LOGGER.warning( 

962 "%s > MAX_BODY_SIZE (%s)", 

963 len(self.request.body), 

964 self.MAX_BODY_SIZE, 

965 ) 

966 raise HTTPError(413) 

967 

968 @override 

969 def render( # noqa: D102 

970 self, template_name: str, **kwargs: Any 

971 ) -> Future[None]: 

972 self.used_render = True 

973 return super().render(template_name, **kwargs) 

974 

975 render.__doc__ = _RequestHandler.render.__doc__ 

976 

977 def set_content_type_header(self) -> None: 

978 """Set the Content-Type header based on `self.content_type`.""" 

979 if str(self.content_type).startswith("text/"): # RFC 2616 (3.7.1) 

980 self.set_header( 

981 "Content-Type", f"{self.content_type};charset=utf-8" 

982 ) 

983 elif self.content_type is not None: 

984 self.set_header("Content-Type", self.content_type) 

985 

986 @override 

987 def set_cookie( # noqa: D102 # pylint: disable=too-many-arguments 

988 self, 

989 name: str, 

990 value: str | bytes, 

991 domain: None | str = None, 

992 expires: None | float | tuple[int, ...] | datetime = None, 

993 path: str = "/", 

994 expires_days: None | float = 400, # changed 

995 *, 

996 secure: bool | None = None, 

997 httponly: bool = True, 

998 **kwargs: Any, 

999 ) -> None: 

1000 if "samesite" not in kwargs: 

1001 # default for same site should be strict 

1002 kwargs["samesite"] = "Strict" 

1003 

1004 super().set_cookie( 

1005 name, 

1006 value, 

1007 domain, 

1008 expires, 

1009 path, 

1010 expires_days, 

1011 secure=( 

1012 self.request.protocol == "https" if secure is None else secure 

1013 ), 

1014 httponly=httponly, 

1015 **kwargs, 

1016 ) 

1017 

1018 set_cookie.__doc__ = _RequestHandler.set_cookie.__doc__ 

1019 

1020 def set_csp_header(self) -> None: 

1021 """Set the Content-Security-Policy header.""" 

1022 self.nonce = secrets.token_urlsafe(16) 

1023 

1024 script_src = ["'self'", f"'nonce-{self.nonce}'"] 

1025 

1026 if ( 

1027 self.apm_enabled 

1028 and "INLINE_SCRIPT_HASH" in self.settings["ELASTIC_APM"] 

1029 ): 

1030 script_src.extend( 

1031 ( 

1032 f"'sha256-{self.settings['ELASTIC_APM']['INLINE_SCRIPT_HASH']}'", 

1033 "'unsafe-inline'", # for browsers that don't support hash 

1034 ) 

1035 ) 

1036 

1037 connect_src = ["'self'"] 

1038 

1039 if self.apm_enabled and "SERVER_URL" in self.settings["ELASTIC_APM"]: 

1040 rum_server_url = self.settings["ELASTIC_APM"].get("RUM_SERVER_URL") 

1041 if rum_server_url: 

1042 # the RUM agent needs to connect to rum_server_url 

1043 connect_src.append(rum_server_url) 

1044 elif rum_server_url is None: 

1045 # the RUM agent needs to connect to ["ELASTIC_APM"]["SERVER_URL"] 

1046 connect_src.append(self.settings["ELASTIC_APM"]["SERVER_URL"]) 

1047 

1048 connect_src.append( # fix for older browsers 

1049 ("wss" if self.request.protocol == "https" else "ws") 

1050 + f"://{self.request.host}" 

1051 ) 

1052 

1053 self.set_header( 

1054 "Content-Security-Policy", 

1055 "default-src 'self';" 

1056 f"script-src {' '.join(script_src)};" 

1057 f"connect-src {' '.join(connect_src)};" 

1058 "style-src 'self' 'unsafe-inline';" 

1059 "img-src 'self' https://img.zeit.de https://github.asozial.org;" 

1060 "frame-ancestors 'self';" 

1061 "sandbox allow-downloads allow-same-origin allow-modals" 

1062 " allow-popups-to-escape-sandbox allow-scripts allow-popups" 

1063 " allow-top-navigation-by-user-activation allow-forms;" 

1064 "report-to default;" 

1065 "base-uri 'none';" 

1066 + ( 

1067 f"report-uri {self.get_reporting_api_endpoint()};" 

1068 if self.settings.get("REPORTING") 

1069 else "" 

1070 ), 

1071 ) 

1072 

1073 @override 

1074 def set_default_headers(self) -> None: 

1075 """Set default headers.""" 

1076 self.set_csp_header() 

1077 self.active_origin_trials = set() 

1078 if self.settings.get("REPORTING"): 

1079 endpoint = self.get_reporting_api_endpoint() 

1080 self.set_header( 

1081 "Reporting-Endpoints", 

1082 f'default="{endpoint}"', # noqa: B907 

1083 ) 

1084 self.set_header( 

1085 "Report-To", 

1086 json.dumps( 

1087 { 

1088 "group": "default", 

1089 "max_age": 2592000, 

1090 "endpoints": [{"url": endpoint}], 

1091 }, 

1092 option=ORJSON_OPTIONS, 

1093 ), 

1094 ) 

1095 self.set_header("NEL", '{"report_to":"default","max_age":2592000}') 

1096 self.set_header("X-Content-Type-Options", "nosniff") 

1097 self.set_header("Access-Control-Max-Age", "7200") 

1098 self.set_header("Access-Control-Allow-Origin", "*") 

1099 self.set_header("Access-Control-Allow-Headers", "*") 

1100 self.set_header( 

1101 "Access-Control-Allow-Methods", 

1102 ", ".join(self.get_allowed_methods()), 

1103 ) 

1104 self.set_header("Cross-Origin-Resource-Policy", "cross-origin") 

1105 self.set_header( 

1106 "Permissions-Policy", 

1107 "browsing-topics=()," 

1108 "identity-credentials-get=()," 

1109 "join-ad-interest-group=()," 

1110 "private-state-token-issuance=()," 

1111 "private-state-token-redemption=()," 

1112 "run-ad-auction=()", 

1113 ) 

1114 self.set_header("Referrer-Policy", "same-origin") 

1115 self.set_header( 

1116 "Cross-Origin-Opener-Policy", "same-origin;report-to=default" 

1117 ) 

1118 if self.request.path == "/kaenguru-comics-alt": # TODO: improve this 

1119 self.set_header( 

1120 "Cross-Origin-Embedder-Policy", 

1121 "credentialless;report-to=default", 

1122 ) 

1123 else: 

1124 self.set_header( 

1125 "Cross-Origin-Embedder-Policy", 

1126 "require-corp;report-to=default", 

1127 ) 

1128 if self.settings.get("HSTS"): 

1129 self.set_header("Strict-Transport-Security", "max-age=63072000") 

1130 if ( 

1131 onion_address := self.settings.get("ONION_ADDRESS") 

1132 ) and not self.request.host_name.endswith(".onion"): 

1133 self.set_header( 

1134 "Onion-Location", 

1135 onion_address 

1136 + self.request.path 

1137 + (f"?{self.request.query}" if self.request.query else ""), 

1138 ) 

1139 if self.settings.get("debug"): 

1140 self.set_header("X-Debug", bool_to_str(True)) 

1141 for permission in Permission: 

1142 if permission.name: 

1143 self.set_header( 

1144 f"X-Permission-{permission.name}", 

1145 bool_to_str(bool(self.is_authorized(permission))), 

1146 ) 

1147 self.set_header( 

1148 "X-Clacks-Overhead", 

1149 CLACKS_OVERHEADS[ 

1150 int(self.now_utc.microsecond) % len(CLACKS_OVERHEADS) 

1151 ], 

1152 ) 

1153 self.set_header("Vary", "Accept,Authorization,Cookie") 

1154 

1155 set_default_headers.__doc__ = _RequestHandler.set_default_headers.__doc__ 

1156 

1157 @classmethod 

1158 def supports_head(cls) -> bool: 

1159 """Check whether this request handler supports HEAD requests.""" 

1160 signature = inspect.signature(cls.get) 

1161 return ( 

1162 "head" in signature.parameters 

1163 and signature.parameters["head"].kind 

1164 == inspect.Parameter.KEYWORD_ONLY 

1165 ) 

1166 

1167 @cached_property 

1168 def user_settings(self) -> Options: 

1169 """Get the user settings.""" 

1170 return Options(self) 

1171 

1172 @override 

1173 def write(self, chunk: str | bytes | dict[str, Any]) -> None: # noqa: D102 

1174 if self._finished: 

1175 raise RuntimeError("Cannot write() after finish()") 

1176 

1177 self.set_content_type_header() 

1178 

1179 if isinstance(chunk, dict): 

1180 chunk = self.dump(chunk) 

1181 

1182 if self.now.date() == date(self.now.year, 4, 27): 

1183 if isinstance(chunk, bytes): 

1184 with contextlib.suppress(UnicodeDecodeError): 

1185 chunk = chunk.decode("UTF-8") 

1186 if isinstance(chunk, str): 

1187 chunk = regex.sub( 

1188 r"\b\p{Lu}\p{Ll}{4}\p{Ll}*\b", 

1189 lambda match: ( 

1190 "Stanley" 

1191 if Random(match[0]).randrange(5) == self.now.year % 5 

1192 else match[0] 

1193 ), 

1194 chunk, 

1195 ) 

1196 

1197 super().write(chunk) 

1198 

1199 write.__doc__ = _RequestHandler.write.__doc__ 

1200 

1201 @override 

1202 def write_error(self, status_code: int, **kwargs: Any) -> None: 

1203 """Render the error page.""" 

1204 dict_content_types: tuple[str, str] = ( 

1205 "application/json", 

1206 "application/yaml", 

1207 ) 

1208 all_error_content_types: tuple[str, ...] = ( 

1209 # text/plain as first (default), to not screw up output in terminals 

1210 "text/plain", 

1211 "text/html", 

1212 "text/markdown", 

1213 *dict_content_types, 

1214 "application/vnd.asozial.dynload+json", 

1215 ) 

1216 

1217 if self.content_type not in all_error_content_types: 

1218 # don't send 406, instead default with text/plain 

1219 self.handle_accept_header(all_error_content_types, strict=False) 

1220 

1221 if self.content_type == "text/html": 

1222 self.render( # type: ignore[unused-awaitable] 

1223 "error.html", 

1224 status=status_code, 

1225 reason=self.get_error_message(**kwargs), 

1226 description=self.get_error_page_description(status_code), 

1227 is_traceback="exc_info" in kwargs 

1228 and not issubclass(kwargs["exc_info"][0], HTTPError) 

1229 and ( 

1230 self.settings.get("serve_traceback") 

1231 or self.is_authorized(Permission.TRACEBACK) 

1232 ), 

1233 ) 

1234 return 

1235 

1236 if self.content_type in dict_content_types: 

1237 self.finish( # type: ignore[unused-awaitable] 

1238 { 

1239 "status": status_code, 

1240 "reason": self.get_error_message(**kwargs), 

1241 } 

1242 ) 

1243 return 

1244 

1245 self.finish( # type: ignore[unused-awaitable] 

1246 f"{status_code} {self.get_error_message(**kwargs)}\n" 

1247 ) 

1248 

1249 write_error.__doc__ = _RequestHandler.write_error.__doc__