Coverage for an_website/utils/request

1# This program is free software: you can redistribute it and/or modify

2# it under the terms of the GNU Affero General Public License as

3# published by the Free Software Foundation, either version 3 of the

4# License, or (at your option) any later version.

6# This program is distributed in the hope that it will be useful,

7# but WITHOUT ANY WARRANTY; without even the implied warranty of

8# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

9# GNU Affero General Public License for more details.

10#

11# You should have received a copy of the GNU Affero General Public License

12# along with this program. If not, see <https://www.gnu.org/licenses/>.

14"""

15Useful request handlers used by other modules.

17This should only contain request handlers and the get_module_info function.

18"""

20from __future__ import annotations

22import logging

23from collections.abc import Mapping

24from datetime import datetime, timedelta, timezone

25from http.client import responses

26from typing import Any, ClassVar, Final, override

27from urllib.parse import unquote, urlsplit

29import regex

30from tornado.httpclient import AsyncHTTPClient

31from tornado.web import HTTPError

33from .. import CA_BUNDLE_PATH, DIR as ROOT_DIR

34from .base_request_handler import BaseRequestHandler

35from .utils import (

36 SUS_PATHS,

37 get_close_matches,

38 remove_suffix_ignore_case,

39 replace_umlauts,

40)

42LOGGER: Final = logging.getLogger(__name__)

45class HTMLRequestHandler(BaseRequestHandler):

46 """A request handler that serves HTML."""

48 POSSIBLE_CONTENT_TYPES: ClassVar[tuple[str, ...]] = (

49 "text/html",

50 "text/plain",

51 "text/markdown",

52 "application/vnd.asozial.dynload+json",

53 )

56class APIRequestHandler(BaseRequestHandler):

57 """The base API request handler."""

59 POSSIBLE_CONTENT_TYPES: ClassVar[tuple[str, ...]] = (

60 "application/json",

61 "application/yaml",

62 )

65class NotFoundHandler(BaseRequestHandler):

66 """Show a 404 page if no other RequestHandler is used."""

68 @override

69 def initialize(self, *args: Any, **kwargs: Any) -> None:

70 """Do nothing to have default title and description."""

71 if "module_info" not in kwargs:

72 kwargs["module_info"] = None

73 super().initialize(*args, **kwargs)

75 @override

76 async def prepare(self) -> None:

77 """Throw a 404 HTTP error or redirect to another page."""

78 self.now = await self.get_time()

80 if self.request.method not in {"GET", "HEAD"}:

81 raise HTTPError(404)

83 new_path = regex.sub(r"/+", "/", self.request.path.rstrip("/")).replace(

84 "_", "-"

85 )

87 for ext in (".html", ".htm", ".php"):

88 new_path = remove_suffix_ignore_case(new_path, f"/index{ext}")

89 new_path = remove_suffix_ignore_case(new_path, ext)

91 new_path = replace_umlauts(new_path)

93 if new_path.lower() in SUS_PATHS:

94 self.set_status(469, reason="Nice Try")

95 return self.write_error(469)

97 if new_path and new_path != self.request.path:

98 return self.redirect(self.fix_url(new_path=new_path), True)

100 this_path_normalized = unquote(new_path).strip("/").lower()

101

102 paths: Mapping[str, str] = self.settings.get("NORMED_PATHS") or {}

103

104 if p := paths.get(this_path_normalized):

105 return self.redirect(self.fix_url(new_path=p), False)

106

107 if len(this_path_normalized) <= 1 and self.request.path != "/":

108 return self.redirect(self.fix_url(new_path="/"))

109

110 prefixes = tuple(

111 (p, repl)

112 for p, repl in paths.items()

113 if this_path_normalized.startswith(f"{p}/")

114 if f"/{p}" != repl.lower()

115 if p != "api" # api should not be a prefix

116 )

117

118 if len(prefixes) == 1:

119 ((prefix, replacement),) = prefixes

120 return self.redirect(

121 self.fix_url(

122 new_path=f"{replacement.strip('/')}"

123 f"{this_path_normalized.removeprefix(prefix)}"

124 ),

125 False,

126 )

127 if prefixes:

128 LOGGER.error(

129 "Too many prefixes %r for path %s", prefixes, self.request.path

130 )

131

132 matches = get_close_matches(this_path_normalized, paths, count=1)

133 if matches:

134 return self.redirect(

135 self.fix_url(new_path=paths[matches[0]]), False

136 )

137

138 self.set_status(404)

139 self.write_error(404)

140

141

142class ErrorPage(HTMLRequestHandler):

143 """A request handler that shows the error page."""

144

145 _success_status: int = 200

146 """The status code that is expected to be returned."""

147

148 @override

149 def clear(self) -> None:

150 """Reset all headers and content for this response."""

151 super().clear()

152 self._success_status = 200

153

154 @override

155 async def get(self, code: str, *, head: bool = False) -> None:

156 """Show the error page."""

157 # pylint: disable=unused-argument

158 status_code = int(code)

159 reason = (

160 "Nice Try" if status_code == 469 else responses.get(status_code, "")

161 )

162 # set the status code if it is allowed

163 if status_code not in (204, 304) and not 100 <= status_code < 200:

164 self.set_status(status_code, reason)

165 self._success_status = status_code

166 return await self.render(

167 "error.html",

168 status=status_code,

169 reason=reason,

170 description=self.get_error_page_description(status_code),

171 is_traceback=False,

172 )

173

174 @override

175 def get_status(self) -> int:

176 """Hack the status code.

177

178 This hacks the status code to be 200 if the status code is expected.

179 This avoids sending error logs to APM or Webhooks in case of success.

180

181 This depends on the fact that Tornado internally uses self._status_code

182 to set the status code in the response and self.get_status() when

183 deciding how to log the request.

184 """

185 status = super().get_status()

186 if status == self._success_status:

187 return 200

188 return status

189

190

191class ZeroDivision(BaseRequestHandler):

192 """A request handler that raises an error."""

193

194 @override

195 async def prepare(self) -> None:

196 """Divide by zero and raise an error."""

197 self.now = await self.get_time()

198 self.handle_accept_header(self.POSSIBLE_CONTENT_TYPES)

199 if self.request.method != "OPTIONS":

200 420 / 0 # pylint: disable=pointless-statement

201

202

203class ElasticRUM(BaseRequestHandler):

204 """A request handler that serves the Elastic RUM Agent."""

205

206 POSSIBLE_CONTENT_TYPES = (

207 "application/javascript",

208 "application/json",

209 "text/javascript", # RFC 9239 (6)

210 )

211

212 URL: ClassVar[str] = (

213 "https://unpkg.com/@elastic/apm-rum@{}"

214 "/dist/bundles/elastic-apm-rum.umd{}.js{}"

215 )

216

217 SCRIPTS: ClassVar[dict[str, bytes]] = {}

218

219 @override

220 async def get(

221 self,

222 version: str,

223 spam: str = "",

224 eggs: str = "",

225 *,

226 head: bool = False,

227 ) -> None:

228 """Serve the RUM script."""

229 self.handle_accept_header(

230 ("application/json",)

231 if eggs

232 else ("application/javascript", "text/javascript")

233 )

234

235 # pylint: disable=redefined-outer-name

236 if (key := version + spam + eggs) not in self.SCRIPTS and not head:

237 response = await AsyncHTTPClient().fetch(

238 self.URL.format(version, spam, eggs),

239 raise_error=False,

240 ca_certs=CA_BUNDLE_PATH,

241 )

242 if response.code != 200:

243 raise HTTPError(response.code, reason=response.reason)

244 self.SCRIPTS[key] = response.body

245 new_path = urlsplit(response.effective_url).path

246 if new_path.endswith(".js"):

247 BaseRequestHandler.ELASTIC_RUM_URL = new_path

248 LOGGER.info("RUM script %s updated", new_path)

249 self.redirect(self.fix_url(new_path), False)

250 return

251

252 if spam and not eggs: # if serving minified JS (URL contains ".min")

253 self.set_header(

254 "SourceMap", self.request.full_url().split("?")[0] + ".map"

255 )

256

257 self.set_header(

258 "Expires", datetime.now(timezone.utc) + timedelta(days=365)

259 )

260 self.set_header(

261 "Cache-Control",

262 f"public, immutable, max-age={60 * 60 * 24 * 365}",

263 )

264

265 return await self.finish(self.SCRIPTS[key] or b"")

266

267

268for key, file in {

269 "5.12.0": "elastic-apm-rum.umd.js",

270 "5.12.0.min": "elastic-apm-rum.umd.min.js",

271 "5.12.0.min.map": "elastic-apm-rum.umd.min.js.map",

272}.items():

273 path = ROOT_DIR / "vendored" / "apm-rum" / file

274 ElasticRUM.SCRIPTS[key] = path.read_bytes()

275

276del key, file, path # type: ignore[possibly-undefined] # pylint: disable=undefined-loop-variable # noqa: B950

Coverage for an_website/utils/request_handler.py: 85.586%

111 statements