Coverage for an_website / reporting / reporting.py: 30.337%
89 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-24 17:35 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-24 17:35 +0000
1# This program is free software: you can redistribute it and/or modify
2# it under the terms of the GNU Affero General Public License as
3# published by the Free Software Foundation, either version 3 of the
4# License, or (at your option) any later version.
5#
6# This program is distributed in the hope that it will be useful,
7# but WITHOUT ANY WARRANTY; without even the implied warranty of
8# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9# GNU Affero General Public License for more details.
10#
11# You should have received a copy of the GNU Affero General Public License
12# along with this program. If not, see <https://www.gnu.org/licenses/>.
14"""The Reporting API™️ of the website."""
16import logging
17from datetime import timedelta
18from typing import Any, ClassVar, Final, cast
20import orjson as json
21from elasticsearch import AsyncElasticsearch
22from elasticsearch.exceptions import NotFoundError
23from elasticsearch.helpers import async_bulk
24from tornado.web import HTTPError
26from .. import EVENT_ELASTICSEARCH, ORJSON_OPTIONS
27from ..utils.request_handler import APIRequestHandler
28from ..utils.utils import ModuleInfo, Permission
30LOGGER: Final = logging.getLogger(__name__)
33def get_module_info() -> ModuleInfo:
34 """Create and return the ModuleInfo for this module."""
35 return ModuleInfo(
36 handlers=((r"/api/reports", ReportingAPI),),
37 name="Reporting API™️",
38 description=(
39 "Die Reporting API™️ kann zur Überwachung von "
40 "Sicherheits-Verstößen, veralteten API-Aufrufen und mehr "
41 "von Seiten des Asozialen Netzwerks genutzt werden.\n"
42 "Bei Interesse kontakten Sie bitte das Gürteltier."
43 ),
44 path="/api/reports",
45 hidden=True,
46 )
49async def get_reports( # pylint: disable=too-many-arguments
50 elasticsearch: AsyncElasticsearch,
51 prefix: str,
52 domain: None | str = None,
53 type_: None | str = None,
54 from_: int = 0,
55 size: int = 10,
56) -> list[dict[str, Any]]:
57 """Get the reports from Elasticsearch."""
58 query: dict[str, dict[str, list[dict[str, dict[str, Any]]]]]
59 query = {"bool": {"filter": [{"range": {"@timestamp": {"gte": "now-1M"}}}]}}
60 query["bool"]["must_not"] = [
61 {
62 "bool": {
63 "filter": [
64 {"term": {"type": {"value": "network-error"}}},
65 {"term": {"body.type": {"value": "abandoned"}}},
66 ]
67 }
68 },
69 {
70 "bool": {
71 "filter": [
72 {"term": {"type": {"value": "csp-violation"}}},
73 {"term": {"body.source-file": {"value": "moz-extension"}}},
74 ]
75 }
76 },
77 ]
78 if domain:
79 query["bool"]["filter"].append(
80 {
81 "simple_query_string": {
82 "query": domain,
83 "fields": ["url.domain"],
84 "flags": "AND|ESCAPE|NOT|OR|PHRASE|PRECEDENCE|WHITESPACE",
85 }
86 }
87 )
88 if type_:
89 query["bool"]["filter"].append(
90 {
91 "simple_query_string": {
92 "query": type_,
93 "fields": ["type"],
94 "flags": "AND|ESCAPE|NOT|OR|PHRASE|PRECEDENCE|WHITESPACE",
95 }
96 }
97 )
98 reports = await elasticsearch.search(
99 index=f"{prefix}-reports",
100 sort=[{"@timestamp": {"order": "desc"}}],
101 query=query,
102 from_=from_,
103 size=size,
104 )
105 return [report["_source"] for report in reports["hits"]["hits"]]
108class ReportingAPI(APIRequestHandler):
109 """The request handler for the Reporting API™️."""
111 POSSIBLE_CONTENT_TYPES: ClassVar[tuple[str, ...]] = (
112 APIRequestHandler.POSSIBLE_CONTENT_TYPES + ("application/x-ndjson",)
113 )
115 RATELIMIT_GET_LIMIT: ClassVar[int] = 20
116 RATELIMIT_GET_COUNT_PER_PERIOD: ClassVar[int] = 2
118 RATELIMIT_POST_LIMIT: ClassVar[int] = 20
119 RATELIMIT_POST_COUNT_PER_PERIOD: ClassVar[int] = 2
121 MAX_BODY_SIZE: ClassVar[int] = 100_000_000
123 MAX_REPORTS_PER_REQUEST: ClassVar[int] = 1000
125 async def get(self, *, head: bool = False) -> None:
126 """Handle GET requests to the Reporting API™️."""
127 if not EVENT_ELASTICSEARCH.is_set():
128 raise HTTPError(503)
130 if head:
131 return
133 domain = self.get_argument("domain", None)
134 type_ = self.get_argument("type", None)
135 from_ = self.get_int_argument("from", 0, min_=0)
136 size = self.get_int_argument("size", 10, min_=0)
138 if not self.is_authorized(Permission.REPORTING):
139 from_ = 0
140 size = min(1000, size)
142 try:
143 reports = await get_reports(
144 self.elasticsearch,
145 self.elasticsearch_prefix,
146 domain,
147 type_,
148 from_,
149 size,
150 )
151 except NotFoundError: # data stream doesn't exist
152 raise HTTPError(404) from None
154 if self.content_type == "application/x-ndjson":
155 await self.finish(
156 b"\n".join(
157 json.dumps(report, option=ORJSON_OPTIONS)
158 for report in reports
159 )
160 )
161 else:
162 await self.finish(self.dump(reports))
164 async def post(self) -> None:
165 """Handle POST requests to the Reporting API™️."""
166 # pylint: disable=too-complex, too-many-branches
167 if not (
168 self.settings.get("REPORTING_BUILTIN")
169 and EVENT_ELASTICSEARCH.is_set()
170 ):
171 raise HTTPError(503)
172 if self.request.headers.get("Content-Type", "").startswith(
173 "application/reports+json"
174 ):
175 reports = json.loads(self.request.body)
176 elif self.request.headers.get("Content-Type", "").startswith(
177 "application/csp-report"
178 ):
179 data = json.loads(self.request.body)
180 if not isinstance(data, dict):
181 raise HTTPError(400)
182 body = data.get("csp-report")
183 if not isinstance(body, dict):
184 raise HTTPError(400)
185 for camel, kebab in (
186 ("blockedURL", "blocked-uri"),
187 ("documentURL", "document-uri"),
188 ("effectiveDirective", "effective-directive"),
189 ("originalPolicy", "original-policy"),
190 ("sample", "script-sample"),
191 ("statusCode", "status-code"),
192 ("violatedDirective", "violated-directive"),
193 ):
194 if kebab in body:
195 body[camel] = body.pop(kebab) # 🥙 → 🐪
196 report = {
197 "age": 0,
198 "body": body,
199 "type": "csp-violation",
200 "url": body.get("documentURL"),
201 "user_agent": self.request.headers.get("User-Agent"),
202 }
203 reports = [report]
204 else:
205 raise HTTPError(415)
206 if not isinstance(reports, list):
207 raise HTTPError(400)
208 if len(reports) > self.MAX_REPORTS_PER_REQUEST:
209 LOGGER.warning(
210 "%s > MAX_REPORTS_PER_REQUEST (%s)",
211 len(reports),
212 self.MAX_REPORTS_PER_REQUEST,
213 )
214 raise HTTPError(400)
215 self.set_status(202)
216 self.finish() # type: ignore[unused-awaitable]
217 for report in reports.copy():
218 if not isinstance(report, dict):
219 reports.remove(report)
220 continue
221 if isinstance((sauce := report.pop("_source", None)), dict):
222 report.update(sauce)
223 if not all(
224 (
225 isinstance(report.get("age"), int),
226 isinstance(report.get("body"), dict),
227 isinstance(report.get("type"), str),
228 isinstance(report.get("url"), str),
229 isinstance(report.get("user_agent"), str),
230 )
231 ):
232 reports.remove(report)
233 continue
234 report["@timestamp"] = self.now - timedelta(
235 milliseconds=max(0, cast(int, report.pop("age")))
236 )
237 report["ecs"] = {"version": "8.17.0"}
238 report["_op_type"] = "create"
239 report.pop("_index", None) # DO NOT REMOVE
240 await async_bulk(
241 self.elasticsearch,
242 reports,
243 index=f"{self.elasticsearch_prefix}-reports",
244 )