Coverage for an_website/utils/request_handler.py: 85.586%

111 statements  

« prev     ^ index     » next       coverage.py v7.6.4, created at 2024-11-16 19:56 +0000

1# This program is free software: you can redistribute it and/or modify 

2# it under the terms of the GNU Affero General Public License as 

3# published by the Free Software Foundation, either version 3 of the 

4# License, or (at your option) any later version. 

5# 

6# This program is distributed in the hope that it will be useful, 

7# but WITHOUT ANY WARRANTY; without even the implied warranty of 

8# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

9# GNU Affero General Public License for more details. 

10# 

11# You should have received a copy of the GNU Affero General Public License 

12# along with this program. If not, see <https://www.gnu.org/licenses/>. 

13 

14""" 

15Useful request handlers used by other modules. 

16 

17This should only contain request handlers and the get_module_info function. 

18""" 

19 

20from __future__ import annotations 

21 

22import logging 

23from collections.abc import Mapping 

24from datetime import datetime, timedelta, timezone 

25from http.client import responses 

26from typing import Any, ClassVar, Final, override 

27from urllib.parse import unquote, urlsplit 

28 

29import regex 

30from tornado.httpclient import AsyncHTTPClient 

31from tornado.web import HTTPError 

32 

33from .. import CA_BUNDLE_PATH, DIR as ROOT_DIR 

34from .base_request_handler import BaseRequestHandler 

35from .utils import ( 

36 SUS_PATHS, 

37 get_close_matches, 

38 remove_suffix_ignore_case, 

39 replace_umlauts, 

40) 

41 

42LOGGER: Final = logging.getLogger(__name__) 

43 

44 

45class HTMLRequestHandler(BaseRequestHandler): 

46 """A request handler that serves HTML.""" 

47 

48 POSSIBLE_CONTENT_TYPES: ClassVar[tuple[str, ...]] = ( 

49 "text/html", 

50 "text/plain", 

51 "text/markdown", 

52 "application/vnd.asozial.dynload+json", 

53 ) 

54 

55 

56class APIRequestHandler(BaseRequestHandler): 

57 """The base API request handler.""" 

58 

59 POSSIBLE_CONTENT_TYPES: ClassVar[tuple[str, ...]] = ( 

60 "application/json", 

61 "application/yaml", 

62 ) 

63 

64 

65class NotFoundHandler(BaseRequestHandler): 

66 """Show a 404 page if no other RequestHandler is used.""" 

67 

68 @override 

69 def initialize(self, *args: Any, **kwargs: Any) -> None: 

70 """Do nothing to have default title and description.""" 

71 if "module_info" not in kwargs: 

72 kwargs["module_info"] = None 

73 super().initialize(*args, **kwargs) 

74 

75 @override 

76 async def prepare(self) -> None: 

77 """Throw a 404 HTTP error or redirect to another page.""" 

78 self.now = await self.get_time() 

79 

80 if self.request.method not in {"GET", "HEAD"}: 

81 raise HTTPError(404) 

82 

83 new_path = regex.sub(r"/+", "/", self.request.path.rstrip("/")).replace( 

84 "_", "-" 

85 ) 

86 

87 for ext in (".html", ".htm", ".php"): 

88 new_path = remove_suffix_ignore_case(new_path, f"/index{ext}") 

89 new_path = remove_suffix_ignore_case(new_path, ext) 

90 

91 new_path = replace_umlauts(new_path) 

92 

93 if new_path.lower() in SUS_PATHS: 

94 self.set_status(469, reason="Nice Try") 

95 return self.write_error(469) 

96 

97 if new_path and new_path != self.request.path: 

98 return self.redirect(self.fix_url(new_path=new_path), True) 

99 

100 this_path_normalized = unquote(new_path).strip("/").lower() 

101 

102 paths: Mapping[str, str] = self.settings.get("NORMED_PATHS") or {} 

103 

104 if p := paths.get(this_path_normalized): 

105 return self.redirect(self.fix_url(new_path=p), False) 

106 

107 if len(this_path_normalized) <= 1 and self.request.path != "/": 

108 return self.redirect(self.fix_url(new_path="/")) 

109 

110 prefixes = tuple( 

111 (p, repl) 

112 for p, repl in paths.items() 

113 if this_path_normalized.startswith(f"{p}/") 

114 if f"/{p}" != repl.lower() 

115 if p != "api" # api should not be a prefix 

116 ) 

117 

118 if len(prefixes) == 1: 

119 ((prefix, replacement),) = prefixes 

120 return self.redirect( 

121 self.fix_url( 

122 new_path=f"{replacement.strip('/')}" 

123 f"{this_path_normalized.removeprefix(prefix)}" 

124 ), 

125 False, 

126 ) 

127 if prefixes: 

128 LOGGER.error( 

129 "Too many prefixes %r for path %s", prefixes, self.request.path 

130 ) 

131 

132 matches = get_close_matches(this_path_normalized, paths, count=1) 

133 if matches: 

134 return self.redirect( 

135 self.fix_url(new_path=paths[matches[0]]), False 

136 ) 

137 

138 self.set_status(404) 

139 self.write_error(404) 

140 

141 

142class ErrorPage(HTMLRequestHandler): 

143 """A request handler that shows the error page.""" 

144 

145 _success_status: int = 200 

146 """The status code that is expected to be returned.""" 

147 

148 @override 

149 def clear(self) -> None: 

150 """Reset all headers and content for this response.""" 

151 super().clear() 

152 self._success_status = 200 

153 

154 @override 

155 async def get(self, code: str, *, head: bool = False) -> None: 

156 """Show the error page.""" 

157 # pylint: disable=unused-argument 

158 status_code = int(code) 

159 reason = ( 

160 "Nice Try" if status_code == 469 else responses.get(status_code, "") 

161 ) 

162 # set the status code if it is allowed 

163 if status_code not in (204, 304) and not 100 <= status_code < 200: 

164 self.set_status(status_code, reason) 

165 self._success_status = status_code 

166 return await self.render( 

167 "error.html", 

168 status=status_code, 

169 reason=reason, 

170 description=self.get_error_page_description(status_code), 

171 is_traceback=False, 

172 ) 

173 

174 @override 

175 def get_status(self) -> int: 

176 """Hack the status code. 

177 

178 This hacks the status code to be 200 if the status code is expected. 

179 This avoids sending error logs to APM or Webhooks in case of success. 

180 

181 This depends on the fact that Tornado internally uses self._status_code 

182 to set the status code in the response and self.get_status() when 

183 deciding how to log the request. 

184 """ 

185 status = super().get_status() 

186 if status == self._success_status: 

187 return 200 

188 return status 

189 

190 

191class ZeroDivision(BaseRequestHandler): 

192 """A request handler that raises an error.""" 

193 

194 @override 

195 async def prepare(self) -> None: 

196 """Divide by zero and raise an error.""" 

197 self.now = await self.get_time() 

198 self.handle_accept_header(self.POSSIBLE_CONTENT_TYPES) 

199 if self.request.method != "OPTIONS": 

200 420 / 0 # pylint: disable=pointless-statement 

201 

202 

203class ElasticRUM(BaseRequestHandler): 

204 """A request handler that serves the Elastic RUM Agent.""" 

205 

206 POSSIBLE_CONTENT_TYPES = ( 

207 "application/javascript", 

208 "application/json", 

209 "text/javascript", # RFC 9239 (6) 

210 ) 

211 

212 URL: ClassVar[str] = ( 

213 "https://unpkg.com/@elastic/apm-rum@{}" 

214 "/dist/bundles/elastic-apm-rum.umd{}.js{}" 

215 ) 

216 

217 SCRIPTS: ClassVar[dict[str, bytes]] = {} 

218 

219 @override 

220 async def get( 

221 self, 

222 version: str, 

223 spam: str = "", 

224 eggs: str = "", 

225 *, 

226 head: bool = False, 

227 ) -> None: 

228 """Serve the RUM script.""" 

229 self.handle_accept_header( 

230 ("application/json",) 

231 if eggs 

232 else ("application/javascript", "text/javascript") 

233 ) 

234 

235 # pylint: disable=redefined-outer-name 

236 if (key := version + spam + eggs) not in self.SCRIPTS and not head: 

237 response = await AsyncHTTPClient().fetch( 

238 self.URL.format(version, spam, eggs), 

239 raise_error=False, 

240 ca_certs=CA_BUNDLE_PATH, 

241 ) 

242 if response.code != 200: 

243 raise HTTPError(response.code, reason=response.reason) 

244 self.SCRIPTS[key] = response.body 

245 new_path = urlsplit(response.effective_url).path 

246 if new_path.endswith(".js"): 

247 BaseRequestHandler.ELASTIC_RUM_URL = new_path 

248 LOGGER.info("RUM script %s updated", new_path) 

249 self.redirect(self.fix_url(new_path), False) 

250 return 

251 

252 if spam and not eggs: # if serving minified JS (URL contains ".min") 

253 self.set_header( 

254 "SourceMap", self.request.full_url().split("?")[0] + ".map" 

255 ) 

256 

257 self.set_header( 

258 "Expires", datetime.now(timezone.utc) + timedelta(days=365) 

259 ) 

260 self.set_header( 

261 "Cache-Control", 

262 f"public, immutable, max-age={60 * 60 * 24 * 365}", 

263 ) 

264 

265 return await self.finish(self.SCRIPTS[key] or b"") 

266 

267 

268for key, file in { 

269 "5.12.0": "elastic-apm-rum.umd.js", 

270 "5.12.0.min": "elastic-apm-rum.umd.min.js", 

271 "5.12.0.min.map": "elastic-apm-rum.umd.min.js.map", 

272}.items(): 

273 path = ROOT_DIR / "vendored" / "apm-rum" / file 

274 ElasticRUM.SCRIPTS[key] = path.read_bytes() 

275 

276del key, file, path # type: ignore[possibly-undefined] # pylint: disable=undefined-loop-variable # noqa: B950