Coverage for an_website / utils / base_request_handler.py: 78.905%

493 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-24 17:35 +0000

1# This program is free software: you can redistribute it and/or modify 

2# it under the terms of the GNU Affero General Public License as 

3# published by the Free Software Foundation, either version 3 of the 

4# License, or (at your option) any later version. 

5# 

6# This program is distributed in the hope that it will be useful, 

7# but WITHOUT ANY WARRANTY; without even the implied warranty of 

8# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

9# GNU Affero General Public License for more details. 

10# 

11# You should have received a copy of the GNU Affero General Public License 

12# along with this program. If not, see <https://www.gnu.org/licenses/>. 

13# pylint: disable=too-many-lines 

14 

15""" 

16The base request handler used by other modules. 

17 

18This should only contain the BaseRequestHandler class. 

19""" 

20 

21import contextlib 

22import inspect 

23import logging 

24import secrets 

25import sys 

26import traceback 

27import uuid 

28from asyncio import Future 

29from base64 import b64decode 

30from collections.abc import Awaitable, Callable, Coroutine, Mapping 

31from contextvars import ContextVar 

32from datetime import date, datetime, timedelta, timezone, tzinfo 

33from functools import cached_property, partial, reduce 

34from random import Random, choice as random_choice 

35from types import TracebackType 

36from typing import Any, ClassVar, Final, cast, override 

37from urllib.parse import SplitResult, urlsplit, urlunsplit 

38from zoneinfo import ZoneInfo 

39 

40import elasticapm 

41import html2text 

42import orjson as json 

43import regex 

44import tornado.web 

45import yaml 

46from accept_types import get_best_match # type: ignore[import-untyped] 

47from ansi2html import Ansi2HTMLConverter 

48from bs4 import BeautifulSoup 

49from dateutil.easter import easter 

50from elastic_transport import ApiError, TransportError 

51from elasticsearch import AsyncElasticsearch 

52from openmoji_dist import VERSION as OPENMOJI_VERSION 

53from redis.asyncio import Redis 

54from tornado.httputil import HTTPServerRequest 

55from tornado.iostream import StreamClosedError 

56from tornado.web import ( 

57 Finish, 

58 GZipContentEncoding, 

59 HTTPError, 

60 MissingArgumentError, 

61 OutputTransform, 

62) 

63 

64from .. import ( 

65 EVENT_ELASTICSEARCH, 

66 EVENT_REDIS, 

67 GH_ORG_URL, 

68 GH_PAGES_URL, 

69 GH_REPO_URL, 

70 NAME, 

71 ORJSON_OPTIONS, 

72 pytest_is_running, 

73) 

74from .decorators import is_authorized 

75from .options import ColourScheme, Options 

76from .static_file_handling import FILE_HASHES_DICT, fix_static_path 

77from .themes import THEMES 

78from .utils import ( 

79 ModuleInfo, 

80 Permission, 

81 add_args_to_url, 

82 ansi_replace, 

83 apply, 

84 backspace_replace, 

85 bool_to_str, 

86 emoji2html, 

87 geoip, 

88 hash_bytes, 

89 is_prime, 

90 ratelimit, 

91 str_to_bool, 

92) 

93 

94LOGGER: Final = logging.getLogger(__name__) 

95 

96TEXT_CONTENT_TYPES: Final[set[str]] = { 

97 "application/javascript", 

98 "application/json", 

99 "application/vnd.asozial.dynload+json", 

100 "application/x-ndjson", 

101 "application/xml", 

102 "application/yaml", 

103} 

104 

105CLACKS_OVERHEADS = ( 

106 "GNU Aaron Swartz", 

107 "GNU Carol Angie Deborah Maltesi", 

108 "GNU Charlotte Angie", 

109 "GNU Terry Pratchett", 

110) 

111 

112request_ctx_var: ContextVar[HTTPServerRequest] = ContextVar("current_request") 

113 

114 

115class _RequestHandler(tornado.web.RequestHandler): 

116 """Base for Tornado request handlers.""" 

117 

118 crawler: bool = False 

119 

120 @override 

121 async def _execute( 

122 self, transforms: list[OutputTransform], *args: bytes, **kwargs: bytes 

123 ) -> None: 

124 request_ctx_var.set(self.request) 

125 

126 self.now = await self.get_time() 

127 

128 return await super()._execute(transforms, *args, **kwargs) 

129 

130 # pylint: disable-next=protected-access 

131 _execute.__doc__ = tornado.web.RequestHandler._execute.__doc__ 

132 

133 @property 

134 def apm_client(self) -> None | elasticapm.Client: 

135 """Get the APM client from the settings.""" 

136 return self.settings.get("ELASTIC_APM", {}).get("CLIENT") # type: ignore[no-any-return] 

137 

138 @property 

139 def apm_enabled(self) -> bool: 

140 """Return whether APM is enabled.""" 

141 return bool(self.settings.get("ELASTIC_APM", {}).get("ENABLED")) 

142 

143 @override 

144 def data_received( # noqa: D102 

145 self, chunk: bytes 

146 ) -> None | Awaitable[None]: 

147 pass 

148 

149 data_received.__doc__ = tornado.web.RequestHandler.data_received.__doc__ 

150 

151 @property 

152 def elasticsearch(self) -> AsyncElasticsearch: 

153 """ 

154 Get the Elasticsearch client from the settings. 

155 

156 This is None if Elasticsearch is not enabled. 

157 """ 

158 return cast(AsyncElasticsearch, self.settings.get("ELASTICSEARCH")) 

159 

160 @property 

161 def elasticsearch_prefix(self) -> str: 

162 """Get the Elasticsearch prefix from the settings.""" 

163 return self.settings.get( # type: ignore[no-any-return] 

164 "ELASTICSEARCH_PREFIX", NAME 

165 ) 

166 

167 def geoip( 

168 self, 

169 ip: None | str = None, 

170 database: str = geoip.__defaults__[0], # type: ignore[index] 

171 *, 

172 allow_fallback: bool = True, 

173 ) -> Coroutine[None, None, None | dict[str, Any]]: 

174 """Get GeoIP information.""" 

175 if not ip: 

176 ip = self.request.remote_ip 

177 if not EVENT_ELASTICSEARCH.is_set(): 

178 return geoip(ip, database) 

179 return geoip( 

180 ip, database, self.elasticsearch, allow_fallback=allow_fallback 

181 ) 

182 

183 async def get_time(self) -> datetime: 

184 """Get the start time of the request in the users' timezone.""" 

185 tz: tzinfo = timezone.utc 

186 try: 

187 geoip = await self.geoip() # pylint: disable=redefined-outer-name 

188 except ApiError, TransportError: 

189 LOGGER.exception("Elasticsearch request failed") 

190 if self.apm_client: 

191 self.apm_client.capture_exception() # type: ignore[no-untyped-call] 

192 else: 

193 if geoip and "timezone" in geoip: 

194 tz = ZoneInfo(geoip["timezone"]) 

195 return datetime.fromtimestamp( 

196 self.request._start_time, tz=tz # pylint: disable=protected-access 

197 ) 

198 

199 def is_authorized( 

200 self, permission: Permission, allow_cookie_auth: bool = True 

201 ) -> bool | None: 

202 """Check whether the request is authorized.""" 

203 return is_authorized(self, permission, allow_cookie_auth) 

204 

205 @override 

206 def log_exception( 

207 self, 

208 typ: None | type[BaseException], 

209 value: None | BaseException, 

210 tb: None | TracebackType, 

211 ) -> None: 

212 if isinstance(value, HTTPError): 

213 super().log_exception(typ, value, tb) 

214 elif typ is StreamClosedError: 

215 LOGGER.debug( 

216 "Stream closed %s", 

217 self._request_summary(), 

218 exc_info=(typ, value, tb), # type: ignore[arg-type] 

219 ) 

220 else: 

221 LOGGER.error( 

222 "Uncaught exception %s", 

223 self._request_summary(), 

224 exc_info=(typ, value, tb), # type: ignore[arg-type] 

225 ) 

226 

227 log_exception.__doc__ = tornado.web.RequestHandler.log_exception.__doc__ 

228 

229 @cached_property 

230 def now(self) -> datetime: 

231 """Get the current time.""" 

232 # pylint: disable=method-hidden 

233 if pytest_is_running(): 

234 raise AssertionError("Now accessed before it was set") 

235 # if self.request.method in self.SUPPORTED_METHODS: # Why? 

236 LOGGER.error("Now accessed before it was set", stacklevel=3) 

237 return self.now_utc 

238 

239 @cached_property 

240 def now_utc(self) -> datetime: 

241 """Get the current time in the correct timezone.""" 

242 return datetime.fromtimestamp( 

243 self.request._start_time, # pylint: disable=protected-access 

244 tz=timezone.utc, 

245 ) 

246 

247 @override # pylint: disable-next=invalid-overridden-method 

248 async def prepare(self) -> None: 

249 """Check authorization and call self.ratelimit().""" 

250 if crawler_secret := self.settings.get("CRAWLER_SECRET"): 

251 self.crawler = crawler_secret in self.request.headers.get( 

252 "User-Agent", "" 

253 ) 

254 

255 if ( 

256 self.request.method in {"GET", "HEAD"} 

257 and self.redirect_to_canonical_domain() 

258 ): 

259 return 

260 

261 if self.request.method != "OPTIONS" and not await self.ratelimit(True): 

262 await self.ratelimit() 

263 

264 async def ratelimit(self, global_ratelimit: bool = False) -> bool: 

265 """Take b1nzy to space using Redis.""" 

266 if ( 

267 not self.settings.get("RATELIMITS") 

268 or self.request.method == "OPTIONS" 

269 or self.is_authorized(Permission.RATELIMITS) 

270 or self.crawler 

271 ): 

272 return False 

273 

274 if not EVENT_REDIS.is_set(): 

275 LOGGER.warning( 

276 ( 

277 "Ratelimits are enabled, but Redis is not available. " 

278 "This can happen shortly after starting the website." 

279 ), 

280 ) 

281 raise HTTPError(503) 

282 

283 if global_ratelimit: # TODO: add to _RequestHandler 

284 ratelimited, headers = await ratelimit( 

285 self.redis, 

286 self.redis_prefix, 

287 str(self.request.remote_ip), 

288 bucket=None, 

289 max_burst=99, # limit = 100 

290 count_per_period=20, # 20 requests per second 

291 period=1, 

292 tokens=10 if self.settings.get("UNDER_ATTACK") else 1, 

293 ) 

294 else: 

295 method = ( 

296 "GET" if self.request.method == "HEAD" else self.request.method 

297 ) 

298 if not (limit := getattr(self, f"RATELIMIT_{method}_LIMIT", 0)): 

299 return False 

300 ratelimited, headers = await ratelimit( 

301 self.redis, 

302 self.redis_prefix, 

303 str(self.request.remote_ip), 

304 bucket=getattr( 

305 self, 

306 f"RATELIMIT_{method}_BUCKET", 

307 self.__class__.__name__.lower(), 

308 ), 

309 max_burst=limit - 1, 

310 count_per_period=getattr( # request count per period 

311 self, 

312 f"RATELIMIT_{method}_COUNT_PER_PERIOD", 

313 30, 

314 ), 

315 period=getattr( 

316 self, f"RATELIMIT_{method}_PERIOD", 60 # period in seconds 

317 ), 

318 tokens=1 if self.request.method != "HEAD" else 0, 

319 ) 

320 

321 for header, value in headers.items(): 

322 self.set_header(header, value) 

323 

324 if ratelimited: 

325 if self.now.date() == date(self.now.year, 4, 20): 

326 self.set_status(420) 

327 self.write_error(420) 

328 else: 

329 self.set_status(429) 

330 self.write_error(429) 

331 

332 return ratelimited 

333 

334 def redirect_to_canonical_domain(self) -> bool: 

335 """Redirect to the canonical domain.""" 

336 if ( 

337 not (domain := self.settings.get("DOMAIN")) 

338 or not self.request.headers.get("Host") 

339 or self.request.host_name == domain 

340 or self.request.host_name.endswith((".onion", ".i2p")) 

341 or regex.fullmatch(r"/[\u2800-\u28FF]+/?", self.request.path) 

342 ): 

343 return False 

344 port = urlsplit(f"//{self.request.headers['Host']}").port 

345 self.redirect( 

346 urlsplit(self.request.full_url()) 

347 ._replace(netloc=f"{domain}:{port}" if port else domain) 

348 .geturl(), 

349 permanent=True, 

350 ) 

351 return True 

352 

353 @property 

354 def redis(self) -> Redis[str]: 

355 """ 

356 Get the Redis client from the settings. 

357 

358 This is None if Redis is not enabled. 

359 """ 

360 return cast("Redis[str]", self.settings.get("REDIS")) 

361 

362 @property 

363 def redis_prefix(self) -> str: 

364 """Get the Redis prefix from the settings.""" 

365 return self.settings.get( # type: ignore[no-any-return] 

366 "REDIS_PREFIX", NAME 

367 ) 

368 

369 

370class BaseRequestHandler(_RequestHandler): 

371 """The base request handler used by every page and API.""" 

372 

373 # pylint: disable=too-many-instance-attributes, too-many-public-methods 

374 

375 ELASTIC_RUM_URL: ClassVar[str] = ( 

376 f"/@apm-rum/elastic-apm-rum.umd{'' if sys.flags.dev_mode else '.min'}.js" 

377 "?v=5.12.0" 

378 ) 

379 

380 COMPUTE_ETAG: ClassVar[bool] = True 

381 ALLOW_COMPRESSION: ClassVar[bool] = True 

382 MAX_BODY_SIZE: ClassVar[None | int] = None 

383 ALLOWED_METHODS: ClassVar[tuple[str, ...]] = ("GET",) 

384 POSSIBLE_CONTENT_TYPES: ClassVar[tuple[str, ...]] = () 

385 

386 module_info: ModuleInfo 

387 # info about page, can be overridden in module_info 

388 title: str = "Das Asoziale Netzwerk" 

389 short_title: str = "Asoziales Netzwerk" 

390 description: str = "Die tolle Webseite des Asozialen Netzwerks" 

391 

392 used_render: bool = False 

393 

394 active_origin_trials: set[str] 

395 content_type: None | str = None 

396 apm_script: None | str 

397 nonce: str 

398 

399 def _finish( 

400 self, chunk: None | str | bytes | dict[str, Any] = None 

401 ) -> Future[None]: 

402 if self._finished: 

403 raise RuntimeError("finish() called twice") 

404 

405 if chunk is not None: 

406 self.write(chunk) 

407 

408 if ( # pylint: disable=too-many-boolean-expressions 

409 (content_type := self.content_type) 

410 and ( 

411 content_type in TEXT_CONTENT_TYPES 

412 or content_type.startswith("text/") 

413 or content_type.endswith(("+xml", "+json")) 

414 ) 

415 and self._write_buffer 

416 and not self._write_buffer[-1].endswith(b"\n") 

417 ): 

418 self.write(b"\n") 

419 

420 return super().finish() 

421 

422 @override 

423 def compute_etag(self) -> None | str: 

424 """Compute ETag with Base85 encoding.""" 

425 if not self.COMPUTE_ETAG: 

426 return None 

427 return f'"{hash_bytes(*self._write_buffer)}"' # noqa: B907 

428 

429 @override 

430 def decode_argument( # noqa: D102 

431 self, value: bytes, name: str | None = None 

432 ) -> str: 

433 try: 

434 return value.decode("UTF-8", "replace") 

435 except UnicodeDecodeError as exc: 

436 err_msg = f"Invalid unicode in {name or 'url'}: {value[:40]!r}" 

437 LOGGER.exception(err_msg, exc_info=exc) 

438 raise HTTPError(400, err_msg) from exc 

439 

440 @property 

441 def dump(self) -> Callable[[Any], str | bytes]: 

442 """Get the function for dumping the output.""" 

443 yaml_subset = self.content_type in { 

444 "application/json", 

445 "application/vnd.asozial.dynload+json", 

446 } 

447 

448 if self.content_type == "application/yaml": 

449 if self.now.timetuple()[2:0:-1] == (1, 4): 

450 yaml_subset = True 

451 else: 

452 return lambda spam: yaml.dump( 

453 spam, 

454 width=self.get_int_argument("yaml_width", 80, min_=80), 

455 ) 

456 

457 if yaml_subset: 

458 option = ORJSON_OPTIONS 

459 if self.get_bool_argument("pretty", False): 

460 option |= json.OPT_INDENT_2 

461 return lambda spam: json.dumps(spam, option=option) 

462 

463 return lambda spam: spam 

464 

465 @override 

466 def finish( # noqa: D102 

467 self, chunk: None | str | bytes | dict[Any, Any] = None 

468 ) -> Future[None]: 

469 as_json = self.content_type == "application/vnd.asozial.dynload+json" 

470 as_plain_text = self.content_type == "text/plain" 

471 as_markdown = self.content_type == "text/markdown" 

472 

473 if ( 

474 not isinstance(chunk, bytes | str) 

475 or self.content_type == "text/html" 

476 or not self.used_render 

477 or not (as_json or as_plain_text or as_markdown) 

478 ): 

479 return self._finish(chunk) 

480 

481 chunk = chunk.decode("UTF-8") if isinstance(chunk, bytes) else chunk 

482 

483 if as_markdown: 

484 return self._finish( 

485 f"# {self.title}\n\n" 

486 + html2text.html2text(chunk, self.request.full_url()).strip() 

487 ) 

488 

489 soup = BeautifulSoup(chunk, features="lxml") 

490 

491 if as_plain_text: 

492 return self._finish(soup.get_text("\n", True)) 

493 

494 dictionary: dict[str, object] = { 

495 "url": self.fix_url(include_protocol_and_host=True), 

496 "title": self.title, 

497 "short_title": ( 

498 self.short_title if self.title != self.short_title else None 

499 ), 

500 "body": "".join( 

501 str(element) 

502 for element in soup.find_all(name="main")[0].contents 

503 ).strip(), 

504 "scripts": [ 

505 {"script": script.string} | script.attrs 

506 for script in soup.find_all("script") 

507 ], 

508 "stylesheets": [ 

509 stylesheet.get("href").strip() 

510 for stylesheet in soup.find_all("link", rel="stylesheet") 

511 ], 

512 "css": "\n".join(style.string for style in soup.find_all("style")), 

513 } 

514 

515 return self._finish(dictionary) 

516 

517 finish.__doc__ = _RequestHandler.finish.__doc__ 

518 

519 def finish_dict(self, **kwargs: Any) -> Future[None]: 

520 """Finish the request with a dictionary.""" 

521 return self.finish(kwargs) 

522 

523 def fix_url( 

524 self, 

525 url: None | str | SplitResult = None, 

526 new_path: None | str = None, 

527 include_protocol_and_host: bool | str = False, 

528 query_args: Mapping[str, None | str | bool | float] | None = None, 

529 ) -> str: 

530 """ 

531 Fix a URL and return it. 

532 

533 If the URL is from another website, link to it with the redirect page, 

534 otherwise just return the URL with no_3rd_party appended. 

535 """ 

536 query_args_d = dict(query_args or {}) 

537 del query_args 

538 if url is None: 

539 url = self.request.full_url() 

540 if isinstance(url, str): 

541 url = urlsplit(url) 

542 if url.netloc and url.netloc.lower() != self.request.host.lower(): 

543 if ( 

544 not self.user_settings.ask_before_leaving 

545 or not self.settings.get("REDIRECT_MODULE_LOADED") 

546 ): 

547 return url.geturl() 

548 path = "/redirect" 

549 query_args_d["to"] = url.geturl() 

550 url = urlsplit(self.request.full_url()) 

551 else: 

552 path = url.path if new_path is None else new_path 

553 path = f"/{path.strip('/')}".lower() 

554 if path == "/lolwut": 

555 path = path.upper() 

556 if path.startswith("/soundboard/files/") or path in FILE_HASHES_DICT: 

557 query_args_d.update( 

558 dict.fromkeys(self.user_settings.iter_option_names()) 

559 ) 

560 else: 

561 for ( 

562 key, 

563 value, 

564 ) in self.user_settings.as_dict_with_str_values().items(): 

565 query_args_d.setdefault(key, value) 

566 for key, value in self.user_settings.as_dict_with_str_values( 

567 include_query_argument=False, 

568 include_body_argument=self.request.path == "/einstellungen" 

569 and self.get_bool_argument("save_in_cookie", False), 

570 ).items(): 

571 if value == query_args_d[key]: 

572 query_args_d[key] = None 

573 

574 result = add_args_to_url( 

575 urlunsplit( 

576 ( 

577 self.request.protocol, 

578 self.request.host, 

579 path, 

580 url.query, 

581 url.fragment, 

582 ) 

583 ), 

584 **query_args_d, 

585 ) 

586 

587 return ( 

588 result 

589 if include_protocol_and_host 

590 else result.removeprefix( 

591 f"{self.request.protocol}://{self.request.host}" 

592 ) 

593 ) 

594 

595 @classmethod 

596 def get_allowed_methods(cls) -> list[str]: 

597 """Get allowed methods.""" 

598 methods = {"OPTIONS", *cls.ALLOWED_METHODS} 

599 if "GET" in cls.ALLOWED_METHODS and cls.supports_head(): 

600 methods.add("HEAD") 

601 return sorted(methods) 

602 

603 def get_bool_argument( 

604 self, 

605 name: str, 

606 default: None | bool = None, 

607 ) -> bool: 

608 """Get an argument parsed as boolean.""" 

609 if default is not None: 

610 return str_to_bool(self.get_argument(name, ""), default) 

611 value = str(self.get_argument(name)) 

612 try: 

613 return str_to_bool(value) 

614 except ValueError as err: 

615 raise HTTPError(400, f"{value} is not a boolean") from err 

616 

617 def get_display_scheme(self) -> ColourScheme: 

618 """Get the scheme currently displayed.""" 

619 scheme = self.user_settings.scheme 

620 if scheme == "random": 

621 return ("light", "dark")[self.now.microsecond & 1] 

622 return scheme 

623 

624 def get_display_theme(self) -> str: 

625 """Get the theme currently displayed.""" 

626 theme = self.user_settings.theme 

627 

628 if theme == "default" and self.now.month == 12: 

629 return "christmas" 

630 

631 if theme != "random": 

632 return theme 

633 

634 ignore_themes = ("random", "christmas") 

635 

636 return random_choice( # nosec: B311 

637 tuple(theme for theme in THEMES if theme not in ignore_themes) 

638 ) 

639 

640 def get_error_message(self, **kwargs: Any) -> str: 

641 """ 

642 Get the error message and return it. 

643 

644 If the serve_traceback setting is true (debug mode is activated), 

645 the traceback gets returned. 

646 """ 

647 if "exc_info" in kwargs and not issubclass( 

648 kwargs["exc_info"][0], HTTPError 

649 ): 

650 if self.settings.get("serve_traceback") or self.is_authorized( 

651 Permission.TRACEBACK 

652 ): 

653 return "".join( 

654 traceback.format_exception(*kwargs["exc_info"]) 

655 ).strip() 

656 return "".join( 

657 traceback.format_exception_only(*kwargs["exc_info"][:2]) 

658 ).strip() 

659 if "exc_info" in kwargs and issubclass( 

660 kwargs["exc_info"][0], MissingArgumentError 

661 ): 

662 return cast(str, kwargs["exc_info"][1].log_message) 

663 return str(self._reason) 

664 

665 def get_error_page_description(self, status_code: int) -> str: 

666 """Get the description for the error page.""" 

667 # pylint: disable=too-many-return-statements 

668 # https://developer.mozilla.org/docs/Web/HTTP/Status 

669 if 100 <= status_code <= 199: 

670 return "Hier gibt es eine total wichtige Information." 

671 if 200 <= status_code <= 299: 

672 return "Hier ist alles super! 🎶🎶" 

673 if 300 <= status_code <= 399: 

674 return "Eine Umleitung ist eingerichtet." 

675 if 400 <= status_code <= 499: 

676 if status_code == 404: 

677 return f"{self.request.path} wurde nicht gefunden." 

678 if status_code == 451: 

679 return "Hier wäre bestimmt geiler Scheiß." 

680 return "Ein Client-Fehler ist aufgetreten." 

681 if 500 <= status_code <= 599: 

682 return "Ein Server-Fehler ist aufgetreten." 

683 raise ValueError( 

684 f"{status_code} is not a valid HTTP response status code." 

685 ) 

686 

687 def get_int_argument( 

688 self, 

689 name: str, 

690 default: None | int = None, 

691 *, 

692 max_: None | int = None, 

693 min_: None | int = None, 

694 ) -> int: 

695 """Get an argument parsed as integer.""" 

696 if default is None: 

697 str_value = self.get_argument(name) 

698 try: 

699 value = int(str_value, base=0) 

700 except ValueError as err: 

701 raise HTTPError(400, f"{str_value} is not an integer") from err 

702 elif self.get_argument(name, ""): 

703 try: 

704 value = int(self.get_argument(name), base=0) 

705 except ValueError: 

706 value = default 

707 else: 

708 value = default 

709 

710 if max_ is not None: 

711 value = min(max_, value) 

712 if min_ is not None: 

713 value = max(min_, value) 

714 

715 return value 

716 

717 def get_module_infos(self) -> tuple[ModuleInfo, ...]: 

718 """Get the module infos.""" 

719 return self.settings.get("MODULE_INFOS") or () 

720 

721 def get_reporting_api_endpoint(self) -> None | str: 

722 """Get the endpoint for the Reporting API™️.""" 

723 if not self.settings.get("REPORTING"): 

724 return None 

725 endpoint = self.settings.get("REPORTING_ENDPOINT") 

726 

727 if not endpoint or not endpoint.startswith("/"): 

728 return endpoint 

729 

730 return f"{self.request.protocol}://{self.request.host}{endpoint}" 

731 

732 @override 

733 def get_template_namespace(self) -> dict[str, Any]: 

734 """ 

735 Add useful things to the template namespace and return it. 

736 

737 They are mostly needed by most of the pages (like title, 

738 description and no_3rd_party). 

739 """ 

740 namespace = super().get_template_namespace() 

741 ansi2html = partial( 

742 Ansi2HTMLConverter(inline=True, scheme="xterm").convert, full=False 

743 ) 

744 namespace.update(self.user_settings.as_dict()) 

745 namespace.update( 

746 ansi2html=partial( 

747 reduce, apply, (ansi2html, ansi_replace, backspace_replace) 

748 ), 

749 apm_script=( 

750 self.settings["ELASTIC_APM"].get("INLINE_SCRIPT") 

751 if self.apm_enabled 

752 else None 

753 ), 

754 as_html=self.content_type == "text/html", 

755 c=self.now.date() == date(self.now.year, 4, 1) 

756 or str_to_bool(self.get_cookie("c", "f") or "f", False), 

757 canonical_url=self.request.protocol 

758 + "://" 

759 + (self.settings["DOMAIN"] or self.request.host) 

760 + self.fix_url( 

761 self.request.full_url().upper() 

762 if self.request.path.upper().startswith("/LOLWUT") 

763 else self.request.full_url().lower() 

764 ) 

765 .split("?")[0] 

766 .removesuffix("/"), 

767 description=self.description, 

768 display_theme=self.get_display_theme(), 

769 display_scheme=self.get_display_scheme(), 

770 elastic_rum_url=self.ELASTIC_RUM_URL, 

771 fix_static=lambda path: self.fix_url(fix_static_path(path)), 

772 fix_url=self.fix_url, 

773 emoji2html=( 

774 emoji2html 

775 if self.user_settings.openmoji == "img" 

776 else ( 

777 (lambda emoji: f'<span class="openmoji">{emoji}</span>') 

778 if self.user_settings.openmoji 

779 else (lambda emoji: f"<span>{emoji}</span>") 

780 ) 

781 ), 

782 form_appendix=self.user_settings.get_form_appendix(), 

783 GH_ORG_URL=GH_ORG_URL, 

784 GH_PAGES_URL=GH_PAGES_URL, 

785 GH_REPO_URL=GH_REPO_URL, 

786 keywords="Asoziales Netzwerk, Känguru-Chroniken" 

787 + ( 

788 f", {self.module_info.get_keywords_as_str(self.request.path)}" 

789 if self.module_info # type: ignore[truthy-bool] 

790 else "" 

791 ), 

792 lang="de", # TODO: add language support 

793 nonce=self.nonce, 

794 now=self.now, 

795 openmoji_version=OPENMOJI_VERSION, 

796 settings=self.settings, 

797 short_title=self.short_title, 

798 testing=pytest_is_running(), 

799 title=self.title, 

800 ) 

801 namespace.update( 

802 { 

803 "🥚": timedelta() 

804 <= self.now.date() - easter(self.now.year) 

805 < timedelta(days=2), 

806 "🦘": is_prime(self.now.microsecond), 

807 } 

808 ) 

809 return namespace 

810 

811 def get_user_id(self) -> str: 

812 """Get the user id saved in the cookie or create one.""" 

813 cookie = self.get_secure_cookie( 

814 "user_id", 

815 max_age_days=90, 

816 min_version=2, 

817 ) 

818 

819 user_id = cookie.decode("UTF-8") if cookie else str(uuid.uuid4()) 

820 

821 if not self.get_secure_cookie( # save it in cookie or reset expiry date 

822 "user_id", max_age_days=30, min_version=2 

823 ): 

824 self.set_secure_cookie( 

825 "user_id", 

826 user_id, 

827 expires_days=90, 

828 path="/", 

829 samesite="Strict", 

830 ) 

831 

832 return user_id 

833 

834 def handle_accept_header( # pylint: disable=inconsistent-return-statements 

835 self, possible_content_types: tuple[str, ...], strict: bool = True 

836 ) -> None: 

837 """Handle the Accept header and set `self.content_type`.""" 

838 if not possible_content_types: 

839 return 

840 content_type = get_best_match( 

841 self.request.headers.get("Accept") or "*/*", 

842 possible_content_types, 

843 ) 

844 if content_type is None: 

845 if strict: 

846 return self.handle_not_acceptable(possible_content_types) 

847 content_type = possible_content_types[0] 

848 self.content_type = content_type 

849 self.set_content_type_header() 

850 

851 def handle_not_acceptable( 

852 self, possible_content_types: tuple[str, ...] 

853 ) -> None: 

854 """Only call this if we cannot respect the Accept header.""" 

855 self.clear_header("Content-Type") 

856 self.set_status(406) 

857 raise Finish("\n".join(possible_content_types) + "\n") 

858 

859 def head(self, *args: Any, **kwargs: Any) -> None | Awaitable[None]: 

860 """Handle HEAD requests.""" 

861 if self.get.__module__ == "tornado.web": 

862 raise HTTPError(405) 

863 if not self.supports_head(): 

864 raise HTTPError(501) 

865 

866 kwargs["head"] = True 

867 return self.get(*args, **kwargs) 

868 

869 @override 

870 def initialize( 

871 self, 

872 *, 

873 module_info: ModuleInfo, 

874 # default is true, because then empty args dicts are 

875 # enough to specify that the defaults should be used 

876 default_title: bool = True, 

877 default_description: bool = True, 

878 ) -> None: 

879 """ 

880 Get title and description from the kwargs. 

881 

882 If title and description are present in the kwargs, 

883 then they override self.title and self.description. 

884 """ 

885 self.module_info = module_info 

886 if not default_title: 

887 page_info = self.module_info.get_page_info(self.request.path) 

888 self.title = page_info.name 

889 self.short_title = page_info.short_name or self.title 

890 if not default_description: 

891 self.description = self.module_info.get_page_info( 

892 self.request.path 

893 ).description 

894 

895 @override 

896 async def options(self, *args: Any, **kwargs: Any) -> None: 

897 """Handle OPTIONS requests.""" 

898 # pylint: disable=unused-argument 

899 self.set_header("Allow", ", ".join(self.get_allowed_methods())) 

900 self.set_status(204) 

901 await self.finish() 

902 

903 def origin_trial(self, token: bytes | str) -> bool: 

904 """Enable an experimental feature.""" 

905 # pylint: disable=protected-access 

906 payload = json.loads(b64decode(token)[69:]) 

907 if payload["feature"] in self.active_origin_trials: 

908 return True 

909 origin = urlsplit(payload["origin"]) 

910 url = urlsplit(self.request.full_url()) 

911 if url.port is None and url.scheme in {"http", "https"}: 

912 url = url._replace( 

913 netloc=f"{url.hostname}:{443 if url.scheme == 'https' else 80}" 

914 ) 

915 if self.request._start_time > payload["expiry"]: 

916 return False 

917 if url.scheme != origin.scheme: 

918 return False 

919 if url.netloc != origin.netloc and not ( 

920 payload.get("isSubdomain") 

921 and url.netloc.endswith(f".{origin.netloc}") 

922 ): 

923 return False 

924 self.add_header("Origin-Trial", token) 

925 self.active_origin_trials.add(payload["feature"]) 

926 return True 

927 

928 @override 

929 async def prepare(self) -> None: 

930 """Check authorization and call self.ratelimit().""" 

931 await super().prepare() 

932 

933 if self._finished: 

934 return 

935 

936 if not self.ALLOW_COMPRESSION: 

937 for transform in self._transforms: 

938 if isinstance(transform, GZipContentEncoding): 

939 # pylint: disable=protected-access 

940 transform._gzipping = False 

941 

942 self.handle_accept_header(self.POSSIBLE_CONTENT_TYPES) 

943 

944 if self.request.method == "GET" and ( 

945 days := Random(self.now.timestamp()).randint(0, 31337) 

946 ) in { 

947 69, 

948 420, 

949 1337, 

950 31337, 

951 }: 

952 self.set_cookie("c", "s", expires_days=days / 24, path="/") 

953 

954 if ( 

955 self.request.method != "OPTIONS" 

956 and self.MAX_BODY_SIZE is not None 

957 and len(self.request.body) > self.MAX_BODY_SIZE 

958 ): 

959 LOGGER.warning( 

960 "%s > MAX_BODY_SIZE (%s)", 

961 len(self.request.body), 

962 self.MAX_BODY_SIZE, 

963 ) 

964 raise HTTPError(413) 

965 

966 @override 

967 def render( # noqa: D102 

968 self, template_name: str, **kwargs: Any 

969 ) -> Future[None]: 

970 self.used_render = True 

971 return super().render(template_name, **kwargs) 

972 

973 render.__doc__ = _RequestHandler.render.__doc__ 

974 

975 def set_content_type_header(self) -> None: 

976 """Set the Content-Type header based on `self.content_type`.""" 

977 if str(self.content_type).startswith("text/"): # RFC 2616 (3.7.1) 

978 self.set_header( 

979 "Content-Type", f"{self.content_type};charset=utf-8" 

980 ) 

981 elif self.content_type is not None: 

982 self.set_header("Content-Type", self.content_type) 

983 

984 @override 

985 def set_cookie( # noqa: D102 # pylint: disable=too-many-arguments 

986 self, 

987 name: str, 

988 value: str | bytes, 

989 domain: None | str = None, 

990 expires: None | float | tuple[int, ...] | datetime = None, 

991 path: str = "/", 

992 expires_days: None | float = 400, # changed 

993 *, 

994 secure: bool | None = None, 

995 httponly: bool = True, 

996 **kwargs: Any, 

997 ) -> None: 

998 if "samesite" not in kwargs: 

999 # default for same site should be strict 

1000 kwargs["samesite"] = "Strict" 

1001 

1002 super().set_cookie( 

1003 name, 

1004 value, 

1005 domain, 

1006 expires, 

1007 path, 

1008 expires_days, 

1009 secure=( 

1010 self.request.protocol == "https" if secure is None else secure 

1011 ), 

1012 httponly=httponly, 

1013 **kwargs, 

1014 ) 

1015 

1016 set_cookie.__doc__ = _RequestHandler.set_cookie.__doc__ 

1017 

1018 def set_csp_header(self) -> None: 

1019 """Set the Content-Security-Policy header.""" 

1020 self.nonce = secrets.token_urlsafe(16) 

1021 

1022 script_src = ["'self'", f"'nonce-{self.nonce}'"] 

1023 

1024 if ( 

1025 self.apm_enabled 

1026 and "INLINE_SCRIPT_HASH" in self.settings["ELASTIC_APM"] 

1027 ): 

1028 script_src.extend( 

1029 ( 

1030 f"'sha256-{self.settings['ELASTIC_APM']['INLINE_SCRIPT_HASH']}'", 

1031 "'unsafe-inline'", # for browsers that don't support hash 

1032 ) 

1033 ) 

1034 

1035 connect_src = ["'self'"] 

1036 

1037 if self.apm_enabled and "SERVER_URL" in self.settings["ELASTIC_APM"]: 

1038 rum_server_url = self.settings["ELASTIC_APM"].get("RUM_SERVER_URL") 

1039 if rum_server_url: 

1040 # the RUM agent needs to connect to rum_server_url 

1041 connect_src.append(rum_server_url) 

1042 elif rum_server_url is None: 

1043 # the RUM agent needs to connect to ["ELASTIC_APM"]["SERVER_URL"] 

1044 connect_src.append(self.settings["ELASTIC_APM"]["SERVER_URL"]) 

1045 

1046 connect_src.append( # fix for older browsers 

1047 ("wss" if self.request.protocol == "https" else "ws") 

1048 + f"://{self.request.host}" 

1049 ) 

1050 

1051 self.set_header( 

1052 "Content-Security-Policy", 

1053 "default-src 'self';" 

1054 f"script-src {' '.join(script_src)};" 

1055 f"connect-src {' '.join(connect_src)};" 

1056 "style-src 'self' 'unsafe-inline';" 

1057 "img-src 'self' https://img.zeit.de https://github.asozial.org;" 

1058 "frame-ancestors 'self';" 

1059 "sandbox allow-downloads allow-same-origin allow-modals" 

1060 " allow-popups-to-escape-sandbox allow-scripts allow-popups" 

1061 " allow-top-navigation-by-user-activation allow-forms;" 

1062 "report-to default;" 

1063 "base-uri 'none';" 

1064 + ( 

1065 f"report-uri {self.get_reporting_api_endpoint()};" 

1066 if self.settings.get("REPORTING") 

1067 else "" 

1068 ), 

1069 ) 

1070 

1071 @override 

1072 def set_default_headers(self) -> None: 

1073 """Set default headers.""" 

1074 self.set_csp_header() 

1075 self.active_origin_trials = set() 

1076 if self.settings.get("REPORTING"): 

1077 endpoint = self.get_reporting_api_endpoint() 

1078 self.set_header( 

1079 "Reporting-Endpoints", 

1080 f'default="{endpoint}"', # noqa: B907 

1081 ) 

1082 self.set_header( 

1083 "Report-To", 

1084 json.dumps( 

1085 { 

1086 "group": "default", 

1087 "max_age": 2592000, 

1088 "endpoints": [{"url": endpoint}], 

1089 }, 

1090 option=ORJSON_OPTIONS, 

1091 ), 

1092 ) 

1093 self.set_header("NEL", '{"report_to":"default","max_age":2592000}') 

1094 self.set_header("X-Content-Type-Options", "nosniff") 

1095 self.set_header("Access-Control-Max-Age", "7200") 

1096 self.set_header("Access-Control-Allow-Origin", "*") 

1097 self.set_header("Access-Control-Allow-Headers", "*") 

1098 self.set_header( 

1099 "Access-Control-Allow-Methods", 

1100 ", ".join(self.get_allowed_methods()), 

1101 ) 

1102 self.set_header("Cross-Origin-Resource-Policy", "cross-origin") 

1103 self.set_header( 

1104 "Permissions-Policy", 

1105 "browsing-topics=()," 

1106 "identity-credentials-get=()," 

1107 "join-ad-interest-group=()," 

1108 "private-state-token-issuance=()," 

1109 "private-state-token-redemption=()," 

1110 "run-ad-auction=()", 

1111 ) 

1112 self.set_header("Referrer-Policy", "same-origin") 

1113 self.set_header( 

1114 "Cross-Origin-Opener-Policy", "same-origin;report-to=default" 

1115 ) 

1116 if self.request.path == "/kaenguru-comics-alt": # TODO: improve this 

1117 self.set_header( 

1118 "Cross-Origin-Embedder-Policy", 

1119 "credentialless;report-to=default", 

1120 ) 

1121 else: 

1122 self.set_header( 

1123 "Cross-Origin-Embedder-Policy", 

1124 "require-corp;report-to=default", 

1125 ) 

1126 if self.settings.get("HSTS"): 

1127 self.set_header("Strict-Transport-Security", "max-age=63072000") 

1128 if ( 

1129 onion_address := self.settings.get("ONION_ADDRESS") 

1130 ) and not self.request.host_name.endswith(".onion"): 

1131 self.set_header( 

1132 "Onion-Location", 

1133 onion_address 

1134 + self.request.path 

1135 + (f"?{self.request.query}" if self.request.query else ""), 

1136 ) 

1137 if self.settings.get("debug"): 

1138 self.set_header("X-Debug", bool_to_str(True)) 

1139 for permission in Permission: 

1140 if permission.name: 

1141 self.set_header( 

1142 f"X-Permission-{permission.name}", 

1143 bool_to_str(bool(self.is_authorized(permission))), 

1144 ) 

1145 self.set_header( 

1146 "X-Clacks-Overhead", 

1147 CLACKS_OVERHEADS[ 

1148 int(self.now_utc.microsecond) % len(CLACKS_OVERHEADS) 

1149 ], 

1150 ) 

1151 self.set_header("Accept-CH", "Sec-CH-Prefers-Reduced-Motion") 

1152 self.set_header("Critical-CH", "Sec-CH-Prefers-Reduced-Motion") 

1153 self.set_header( 

1154 "Vary", "Accept,Authorization,Cookie,Sec-CH-Prefers-Reduced-Motion" 

1155 ) 

1156 

1157 set_default_headers.__doc__ = _RequestHandler.set_default_headers.__doc__ 

1158 

1159 @classmethod 

1160 def supports_head(cls) -> bool: 

1161 """Check whether this request handler supports HEAD requests.""" 

1162 signature = inspect.signature(cls.get) 

1163 return ( 

1164 "head" in signature.parameters 

1165 and signature.parameters["head"].kind 

1166 == inspect.Parameter.KEYWORD_ONLY 

1167 ) 

1168 

1169 @cached_property 

1170 def user_settings(self) -> Options: 

1171 """Get the user settings.""" 

1172 return Options(self) 

1173 

1174 @override 

1175 def write(self, chunk: str | bytes | dict[str, Any]) -> None: # noqa: D102 

1176 if self._finished: 

1177 raise RuntimeError("Cannot write() after finish()") 

1178 

1179 self.set_content_type_header() 

1180 

1181 if isinstance(chunk, dict): 

1182 chunk = self.dump(chunk) 

1183 

1184 if self.now.date() == date(self.now.year, 4, 27): 

1185 if isinstance(chunk, bytes): 

1186 with contextlib.suppress(UnicodeDecodeError): 

1187 chunk = chunk.decode("UTF-8") 

1188 if isinstance(chunk, str): 

1189 chunk = regex.sub( 

1190 r"\b\p{Lu}\p{Ll}{4}\p{Ll}*\b", 

1191 lambda match: ( 

1192 "Stanley" 

1193 if Random(match[0]).randrange(5) == self.now.year % 5 

1194 else match[0] 

1195 ), 

1196 chunk, 

1197 ) 

1198 

1199 super().write(chunk) 

1200 

1201 write.__doc__ = _RequestHandler.write.__doc__ 

1202 

1203 @override 

1204 def write_error(self, status_code: int, **kwargs: Any) -> None: 

1205 """Render the error page.""" 

1206 dict_content_types: tuple[str, str] = ( 

1207 "application/json", 

1208 "application/yaml", 

1209 ) 

1210 all_error_content_types: tuple[str, ...] = ( 

1211 # text/plain as first (default), to not screw up output in terminals 

1212 "text/plain", 

1213 "text/html", 

1214 "text/markdown", 

1215 *dict_content_types, 

1216 "application/vnd.asozial.dynload+json", 

1217 ) 

1218 

1219 if self.content_type not in all_error_content_types: 

1220 # don't send 406, instead default with text/plain 

1221 self.handle_accept_header(all_error_content_types, strict=False) 

1222 

1223 if self.content_type == "text/html": 

1224 self.render( # type: ignore[unused-awaitable] 

1225 "error.html", 

1226 status=status_code, 

1227 reason=self.get_error_message(**kwargs), 

1228 description=self.get_error_page_description(status_code), 

1229 is_traceback="exc_info" in kwargs 

1230 and not issubclass(kwargs["exc_info"][0], HTTPError) 

1231 and ( 

1232 self.settings.get("serve_traceback") 

1233 or self.is_authorized(Permission.TRACEBACK) 

1234 ), 

1235 ) 

1236 return 

1237 

1238 if self.content_type in dict_content_types: 

1239 self.finish( # type: ignore[unused-awaitable] 

1240 { 

1241 "status": status_code, 

1242 "reason": self.get_error_message(**kwargs), 

1243 } 

1244 ) 

1245 return 

1246 

1247 self.finish( # type: ignore[unused-awaitable] 

1248 f"{status_code} {self.get_error_message(**kwargs)}\n" 

1249 ) 

1250 

1251 write_error.__doc__ = _RequestHandler.write_error.__doc__