Coverage for an_website / reporting / reporting.py: 30.337%

89 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-24 17:35 +0000

1# This program is free software: you can redistribute it and/or modify 

2# it under the terms of the GNU Affero General Public License as 

3# published by the Free Software Foundation, either version 3 of the 

4# License, or (at your option) any later version. 

5# 

6# This program is distributed in the hope that it will be useful, 

7# but WITHOUT ANY WARRANTY; without even the implied warranty of 

8# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

9# GNU Affero General Public License for more details. 

10# 

11# You should have received a copy of the GNU Affero General Public License 

12# along with this program. If not, see <https://www.gnu.org/licenses/>. 

13 

14"""The Reporting API™️ of the website.""" 

15 

16import logging 

17from datetime import timedelta 

18from typing import Any, ClassVar, Final, cast 

19 

20import orjson as json 

21from elasticsearch import AsyncElasticsearch 

22from elasticsearch.exceptions import NotFoundError 

23from elasticsearch.helpers import async_bulk 

24from tornado.web import HTTPError 

25 

26from .. import EVENT_ELASTICSEARCH, ORJSON_OPTIONS 

27from ..utils.request_handler import APIRequestHandler 

28from ..utils.utils import ModuleInfo, Permission 

29 

30LOGGER: Final = logging.getLogger(__name__) 

31 

32 

33def get_module_info() -> ModuleInfo: 

34 """Create and return the ModuleInfo for this module.""" 

35 return ModuleInfo( 

36 handlers=((r"/api/reports", ReportingAPI),), 

37 name="Reporting API™️", 

38 description=( 

39 "Die Reporting API™️ kann zur Überwachung von " 

40 "Sicherheits-Verstößen, veralteten API-Aufrufen und mehr " 

41 "von Seiten des Asozialen Netzwerks genutzt werden.\n" 

42 "Bei Interesse kontakten Sie bitte das Gürteltier." 

43 ), 

44 path="/api/reports", 

45 hidden=True, 

46 ) 

47 

48 

49async def get_reports( # pylint: disable=too-many-arguments 

50 elasticsearch: AsyncElasticsearch, 

51 prefix: str, 

52 domain: None | str = None, 

53 type_: None | str = None, 

54 from_: int = 0, 

55 size: int = 10, 

56) -> list[dict[str, Any]]: 

57 """Get the reports from Elasticsearch.""" 

58 query: dict[str, dict[str, list[dict[str, dict[str, Any]]]]] 

59 query = {"bool": {"filter": [{"range": {"@timestamp": {"gte": "now-1M"}}}]}} 

60 query["bool"]["must_not"] = [ 

61 { 

62 "bool": { 

63 "filter": [ 

64 {"term": {"type": {"value": "network-error"}}}, 

65 {"term": {"body.type": {"value": "abandoned"}}}, 

66 ] 

67 } 

68 }, 

69 { 

70 "bool": { 

71 "filter": [ 

72 {"term": {"type": {"value": "csp-violation"}}}, 

73 {"term": {"body.source-file": {"value": "moz-extension"}}}, 

74 ] 

75 } 

76 }, 

77 ] 

78 if domain: 

79 query["bool"]["filter"].append( 

80 { 

81 "simple_query_string": { 

82 "query": domain, 

83 "fields": ["url.domain"], 

84 "flags": "AND|ESCAPE|NOT|OR|PHRASE|PRECEDENCE|WHITESPACE", 

85 } 

86 } 

87 ) 

88 if type_: 

89 query["bool"]["filter"].append( 

90 { 

91 "simple_query_string": { 

92 "query": type_, 

93 "fields": ["type"], 

94 "flags": "AND|ESCAPE|NOT|OR|PHRASE|PRECEDENCE|WHITESPACE", 

95 } 

96 } 

97 ) 

98 reports = await elasticsearch.search( 

99 index=f"{prefix}-reports", 

100 sort=[{"@timestamp": {"order": "desc"}}], 

101 query=query, 

102 from_=from_, 

103 size=size, 

104 ) 

105 return [report["_source"] for report in reports["hits"]["hits"]] 

106 

107 

108class ReportingAPI(APIRequestHandler): 

109 """The request handler for the Reporting API™️.""" 

110 

111 POSSIBLE_CONTENT_TYPES: ClassVar[tuple[str, ...]] = ( 

112 APIRequestHandler.POSSIBLE_CONTENT_TYPES + ("application/x-ndjson",) 

113 ) 

114 

115 RATELIMIT_GET_LIMIT: ClassVar[int] = 20 

116 RATELIMIT_GET_COUNT_PER_PERIOD: ClassVar[int] = 2 

117 

118 RATELIMIT_POST_LIMIT: ClassVar[int] = 20 

119 RATELIMIT_POST_COUNT_PER_PERIOD: ClassVar[int] = 2 

120 

121 MAX_BODY_SIZE: ClassVar[int] = 100_000_000 

122 

123 MAX_REPORTS_PER_REQUEST: ClassVar[int] = 1000 

124 

125 async def get(self, *, head: bool = False) -> None: 

126 """Handle GET requests to the Reporting API™️.""" 

127 if not EVENT_ELASTICSEARCH.is_set(): 

128 raise HTTPError(503) 

129 

130 if head: 

131 return 

132 

133 domain = self.get_argument("domain", None) 

134 type_ = self.get_argument("type", None) 

135 from_ = self.get_int_argument("from", 0, min_=0) 

136 size = self.get_int_argument("size", 10, min_=0) 

137 

138 if not self.is_authorized(Permission.REPORTING): 

139 from_ = 0 

140 size = min(1000, size) 

141 

142 try: 

143 reports = await get_reports( 

144 self.elasticsearch, 

145 self.elasticsearch_prefix, 

146 domain, 

147 type_, 

148 from_, 

149 size, 

150 ) 

151 except NotFoundError: # data stream doesn't exist 

152 raise HTTPError(404) from None 

153 

154 if self.content_type == "application/x-ndjson": 

155 await self.finish( 

156 b"\n".join( 

157 json.dumps(report, option=ORJSON_OPTIONS) 

158 for report in reports 

159 ) 

160 ) 

161 else: 

162 await self.finish(self.dump(reports)) 

163 

164 async def post(self) -> None: 

165 """Handle POST requests to the Reporting API™️.""" 

166 # pylint: disable=too-complex, too-many-branches 

167 if not ( 

168 self.settings.get("REPORTING_BUILTIN") 

169 and EVENT_ELASTICSEARCH.is_set() 

170 ): 

171 raise HTTPError(503) 

172 if self.request.headers.get("Content-Type", "").startswith( 

173 "application/reports+json" 

174 ): 

175 reports = json.loads(self.request.body) 

176 elif self.request.headers.get("Content-Type", "").startswith( 

177 "application/csp-report" 

178 ): 

179 data = json.loads(self.request.body) 

180 if not isinstance(data, dict): 

181 raise HTTPError(400) 

182 body = data.get("csp-report") 

183 if not isinstance(body, dict): 

184 raise HTTPError(400) 

185 for camel, kebab in ( 

186 ("blockedURL", "blocked-uri"), 

187 ("documentURL", "document-uri"), 

188 ("effectiveDirective", "effective-directive"), 

189 ("originalPolicy", "original-policy"), 

190 ("sample", "script-sample"), 

191 ("statusCode", "status-code"), 

192 ("violatedDirective", "violated-directive"), 

193 ): 

194 if kebab in body: 

195 body[camel] = body.pop(kebab) # 🥙 → 🐪 

196 report = { 

197 "age": 0, 

198 "body": body, 

199 "type": "csp-violation", 

200 "url": body.get("documentURL"), 

201 "user_agent": self.request.headers.get("User-Agent"), 

202 } 

203 reports = [report] 

204 else: 

205 raise HTTPError(415) 

206 if not isinstance(reports, list): 

207 raise HTTPError(400) 

208 if len(reports) > self.MAX_REPORTS_PER_REQUEST: 

209 LOGGER.warning( 

210 "%s > MAX_REPORTS_PER_REQUEST (%s)", 

211 len(reports), 

212 self.MAX_REPORTS_PER_REQUEST, 

213 ) 

214 raise HTTPError(400) 

215 self.set_status(202) 

216 self.finish() # type: ignore[unused-awaitable] 

217 for report in reports.copy(): 

218 if not isinstance(report, dict): 

219 reports.remove(report) 

220 continue 

221 if isinstance((sauce := report.pop("_source", None)), dict): 

222 report.update(sauce) 

223 if not all( 

224 ( 

225 isinstance(report.get("age"), int), 

226 isinstance(report.get("body"), dict), 

227 isinstance(report.get("type"), str), 

228 isinstance(report.get("url"), str), 

229 isinstance(report.get("user_agent"), str), 

230 ) 

231 ): 

232 reports.remove(report) 

233 continue 

234 report["@timestamp"] = self.now - timedelta( 

235 milliseconds=max(0, cast(int, report.pop("age"))) 

236 ) 

237 report["ecs"] = {"version": "8.17.0"} 

238 report["_op_type"] = "create" 

239 report.pop("_index", None) # DO NOT REMOVE 

240 await async_bulk( 

241 self.elasticsearch, 

242 reports, 

243 index=f"{self.elasticsearch_prefix}-reports", 

244 )