Coverage for an_website/patches/__init__.py: 88.623%

167 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2025-01-22 15:59 +0000

1# This program is free software: you can redistribute it and/or modify 

2# it under the terms of the GNU Affero General Public License as 

3# published by the Free Software Foundation, either version 3 of the 

4# License, or (at your option) any later version. 

5# 

6# This program is distributed in the hope that it will be useful, 

7# but WITHOUT ANY WARRANTY; without even the implied warranty of 

8# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

9# GNU Affero General Public License for more details. 

10# 

11# You should have received a copy of the GNU Affero General Public License 

12# along with this program. If not, see <https://www.gnu.org/licenses/>. 

13# pylint: disable=protected-access 

14 

15"""Patches that improve everything.""" 

16 

17from __future__ import annotations 

18 

19import asyncio 

20import http.client 

21import json as stdlib_json # pylint: disable=preferred-module 

22import logging 

23import os 

24import sys 

25from collections.abc import Callable 

26from configparser import RawConfigParser 

27from contextlib import suppress 

28from importlib import import_module 

29from pathlib import Path 

30from threading import Thread 

31from types import MethodType 

32from typing import Any 

33from urllib.parse import urlsplit 

34 

35import certifi 

36import defusedxml # type: ignore[import-untyped] 

37import jsonpickle # type: ignore[import-untyped] 

38import orjson 

39import pycurl 

40import tornado.httputil 

41import yaml 

42from emoji import EMOJI_DATA 

43from pillow_jxl import JpegXLImagePlugin # noqa: F401 

44from setproctitle import setthreadtitle 

45from tornado.httpclient import AsyncHTTPClient, HTTPRequest 

46from tornado.httputil import HTTPFile, HTTPHeaders, HTTPServerRequest 

47from tornado.log import gen_log 

48from tornado.web import GZipContentEncoding, RedirectHandler, RequestHandler 

49 

50from .. import CA_BUNDLE_PATH, MEDIA_TYPES 

51from . import braille, json # noqa: F401 # pylint: disable=reimported 

52 

53 

54def apply() -> None: 

55 """Improve.""" 

56 patch_asyncio() 

57 patch_certifi() 

58 patch_configparser() 

59 patch_emoji() 

60 patch_http() 

61 patch_json() 

62 patch_jsonpickle() 

63 patch_threading() 

64 patch_xml() 

65 

66 patch_tornado_418() 

67 patch_tornado_arguments() 

68 patch_tornado_gzip() 

69 patch_tornado_httpclient() 

70 patch_tornado_logs() 

71 patch_tornado_redirect() 

72 

73 

74def patch_asyncio() -> None: 

75 """Make stuff faster.""" 

76 if os.environ.get("DISABLE_UVLOOP") not in { 

77 "y", "yes", "t", "true", "on", "1" # fmt: skip 

78 }: 

79 with suppress(ModuleNotFoundError): 

80 asyncio.set_event_loop_policy( 

81 import_module("uvloop").EventLoopPolicy() 

82 ) 

83 

84 

85def patch_certifi() -> None: 

86 """Make everything use our CA bundle.""" 

87 certifi.where = lambda: CA_BUNDLE_PATH 

88 certifi.contents = lambda: Path(certifi.where()).read_text("ASCII") 

89 

90 

91def patch_configparser() -> None: 

92 """Make configparser funky.""" 

93 RawConfigParser.BOOLEAN_STATES.update( # type: ignore[attr-defined] 

94 { 

95 "sure": True, 

96 "nope": False, 

97 "accept": True, 

98 "reject": False, 

99 "enabled": True, 

100 "disabled": False, 

101 } 

102 ) 

103 

104 

105def patch_emoji() -> None: 

106 """Add cool new emoji.""" 

107 EMOJI_DATA["🐱\u200D💻"] = { 

108 "de": ":hacker_katze:", 

109 "en": ":hacker_cat:", 

110 "status": 2, 

111 "E": 1, 

112 } 

113 for de_name, en_name, rect in ( 

114 ("rot", "red", "🟥"), 

115 ("blau", "blue", "🟦"), 

116 ("orang", "orange", "🟧"), 

117 ("gelb", "yellow", "🟨"), 

118 ("grün", "green", "🟩"), 

119 ("lilan", "purple", "🟪"), 

120 ("braun", "brown", "🟫"), 

121 ): 

122 EMOJI_DATA[f"🫙\u200D{rect}"] = { 

123 "de": f":{de_name}es_glas:", 

124 "en": f":{en_name}_jar:", 

125 "status": 2, 

126 "E": 14, 

127 } 

128 EMOJI_DATA[f"🏳\uFE0F\u200D{rect}"] = { 

129 "de": f":{de_name}e_flagge:", 

130 "en": f":{en_name}_flag:", 

131 "status": 2, 

132 "E": 11, 

133 } 

134 EMOJI_DATA[f"\u2691\uFE0F\u200D{rect}"] = { 

135 "de": f":tief{de_name}e_flagge:", 

136 "en": f":deep_{en_name}_flag:", 

137 "status": 2, 

138 "E": 11, 

139 } 

140 

141 

142def patch_http() -> None: 

143 """Add response code 420.""" 

144 http.client.responses[420] = "Enhance Your Calm" 

145 

146 

147def patch_json() -> None: 

148 """Replace json with orjson.""" 

149 if getattr(stdlib_json, "_omegajson", False) or sys.version_info < (3, 12): 

150 return 

151 stdlib_json.dumps = json.dumps 

152 stdlib_json.dump = json.dump # type: ignore[assignment] 

153 stdlib_json.loads = json.loads # type: ignore[assignment] 

154 stdlib_json.load = json.load 

155 

156 

157def patch_jsonpickle() -> None: 

158 """Make jsonpickle return bytes.""" 

159 jsonpickle.load_backend("orjson") 

160 jsonpickle.set_preferred_backend("orjson") 

161 jsonpickle.enable_fallthrough(False) 

162 

163 

164def patch_threading() -> None: 

165 """Set thread names.""" 

166 _bootstrap = Thread._bootstrap # type: ignore[attr-defined] 

167 

168 def bootstrap(self: Thread) -> None: 

169 with suppress(Exception): 

170 setthreadtitle(self.name) 

171 _bootstrap(self) 

172 

173 Thread._bootstrap = bootstrap # type: ignore[attr-defined] 

174 

175 

176def patch_tornado_418() -> None: 

177 """Add support for RFC 7168.""" 

178 RequestHandler.SUPPORTED_METHODS += ( # type: ignore[assignment] 

179 "PROPFIND", 

180 "BREW", 

181 "WHEN", 

182 ) 

183 _ = RequestHandler._unimplemented_method 

184 RequestHandler.propfind = _ # type: ignore[attr-defined] 

185 RequestHandler.brew = _ # type: ignore[attr-defined] 

186 RequestHandler.when = _ # type: ignore[attr-defined] 

187 

188 

189def patch_tornado_arguments() -> None: # noqa: C901 

190 """Improve argument parsing.""" 

191 # pylint: disable=too-complex 

192 

193 def ensure_bytes(value: Any) -> bytes: 

194 """Return the value as bytes.""" 

195 if isinstance(value, bool): 

196 return b"true" if value else b"false" 

197 if isinstance(value, bytes): 

198 return value 

199 return str(value).encode("UTF-8") 

200 

201 def parse_body_arguments( 

202 content_type: str, 

203 body: bytes, 

204 arguments: dict[str, list[bytes]], 

205 files: dict[str, list[HTTPFile]], 

206 headers: None | HTTPHeaders = None, 

207 *, 

208 _: Callable[..., None] = tornado.httputil.parse_body_arguments, 

209 ) -> None: 

210 # pylint: disable=too-many-branches 

211 if content_type.startswith("application/json"): 

212 if headers and "Content-Encoding" in headers: 

213 gen_log.warning( 

214 "Unsupported Content-Encoding: %s", 

215 headers["Content-Encoding"], 

216 ) 

217 return 

218 try: 

219 spam = orjson.loads(body) 

220 except Exception as exc: # pylint: disable=broad-except 

221 gen_log.warning("Invalid JSON body: %s", exc) 

222 else: 

223 if not isinstance(spam, dict): 

224 return 

225 for key, value in spam.items(): 

226 if value is not None: 

227 arguments.setdefault(key, []).append( 

228 ensure_bytes(value) 

229 ) 

230 elif content_type.startswith("application/yaml"): 

231 if headers and "Content-Encoding" in headers: 

232 gen_log.warning( 

233 "Unsupported Content-Encoding: %s", 

234 headers["Content-Encoding"], 

235 ) 

236 return 

237 try: 

238 spam = yaml.safe_load(body) 

239 except Exception as exc: # pylint: disable=broad-except 

240 gen_log.warning("Invalid YAML body: %s", exc) 

241 else: 

242 if not isinstance(spam, dict): 

243 return 

244 for key, value in spam.items(): 

245 if value is not None: 

246 arguments.setdefault(key, []).append( 

247 ensure_bytes(value) 

248 ) 

249 else: 

250 _(content_type, body, arguments, files, headers) 

251 

252 parse_body_arguments.__doc__ = tornado.httputil.parse_body_arguments.__doc__ 

253 

254 tornado.httputil.parse_body_arguments = parse_body_arguments 

255 

256 

257def patch_tornado_gzip() -> None: 

258 """Use gzip for more content types.""" 

259 GZipContentEncoding.CONTENT_TYPES = { 

260 type for type, data in MEDIA_TYPES.items() if data.get("compressible") 

261 } 

262 

263 

264def patch_tornado_httpclient() -> None: # fmt: off 

265 """Make requests quick.""" 

266 BACON = 0x75800 # noqa: N806 # pylint: disable=invalid-name 

267 EGGS = 1 << 25 # noqa: N806 # pylint: disable=invalid-name 

268 

269 AsyncHTTPClient.configure("tornado.curl_httpclient.CurlAsyncHTTPClient") 

270 

271 def prepare_curl_callback(self: HTTPRequest, curl: pycurl.Curl) -> None: 

272 # pylint: disable=c-extension-no-member, useless-suppression 

273 if urlsplit(self.url).scheme == "https": # noqa: SIM102 

274 if (ver := pycurl.version_info())[2] >= BACON and ver[4] & EGGS: 

275 curl.setopt(pycurl.HTTP_VERSION, pycurl.CURL_HTTP_VERSION_3) 

276 

277 original_request_init = HTTPRequest.__init__ 

278 

279 def request_init(self: HTTPRequest, *args: Any, **kwargs: Any) -> None: 

280 if len(args) < 18: # there are too many positional arguments here 

281 prepare_curl_method = MethodType(prepare_curl_callback, self) 

282 kwargs.setdefault("prepare_curl_callback", prepare_curl_method) 

283 original_request_init(self, *args, **kwargs) 

284 

285 request_init.__doc__ = HTTPRequest.__init__.__doc__ 

286 

287 HTTPRequest.__init__ = request_init # type: ignore[method-assign] 

288 

289 

290def patch_tornado_logs() -> None: 

291 """Anonymize Tornado logs.""" 

292 # pylint: disable=import-outside-toplevel 

293 from ..utils.utils import SUS_PATHS, anonymize_ip 

294 

295 RequestHandler._request_summary = ( # type: ignore[method-assign] 

296 lambda self: "%s %s (%s)" # pylint: disable=consider-using-f-string 

297 % ( 

298 self.request.method, 

299 self.request.uri, 

300 ( 

301 self.request.remote_ip 

302 if self.request.path == "/robots.txt" 

303 or self.request.path.lower() in SUS_PATHS 

304 else anonymize_ip(self.request.remote_ip, ignore_invalid=True) 

305 ), 

306 ) 

307 ) 

308 

309 HTTPServerRequest.__repr__ = ( # type: ignore[method-assign] 

310 lambda self: "%s(%s)" # pylint: disable=consider-using-f-string 

311 % ( 

312 self.__class__.__name__, 

313 ", ".join( 

314 [ 

315 "%s=%r" # pylint: disable=consider-using-f-string 

316 % ( 

317 n, 

318 getattr(self, n), 

319 ) 

320 for n in ("protocol", "host", "method", "uri", "version") 

321 ] 

322 ), 

323 ) 

324 ) 

325 

326 

327def patch_tornado_redirect() -> None: 

328 """Use modern redirect codes and support HEAD requests.""" 

329 

330 def redirect( 

331 self: RequestHandler, 

332 url: str, 

333 permanent: bool = False, 

334 status: None | int = None, 

335 ) -> None: 

336 if url == self.request.full_url(): 

337 logging.getLogger( 

338 f"{self.__class__.__module__}.{self.__class__.__qualname__}" 

339 ).critical("Infinite redirect to %r detected", url) 

340 if self._headers_written: 

341 # pylint: disable=broad-exception-raised 

342 raise Exception("Cannot redirect after headers have been written") 

343 if status is None: 

344 status = 308 if permanent else 307 

345 else: 

346 assert isinstance(status, int) and 300 <= status <= 399 # type: ignore[redundant-expr] # noqa: B950 

347 self.set_status(status) 

348 self.set_header("Location", url) 

349 self.finish() # type: ignore[unused-awaitable] 

350 

351 if RequestHandler.redirect.__doc__: 

352 # fmt: off 

353 redirect.__doc__ = ( 

354 RequestHandler.redirect.__doc__ 

355 .replace("301", "308") 

356 .replace("302", "307") 

357 ) 

358 # fmt: on 

359 

360 RequestHandler.redirect = redirect # type: ignore[method-assign] 

361 

362 RedirectHandler.head = RedirectHandler.get 

363 

364 

365def patch_xml() -> None: 

366 """Make XML safer.""" 

367 defusedxml.defuse_stdlib() 

368 defusedxml.xmlrpc.monkey_patch()