Coverage for an_website/search/search.py: 72.581%
62 statements
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-10 18:56 +0000
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-10 18:56 +0000
1# This program is free software: you can redistribute it and/or modify
2# it under the terms of the GNU Affero General Public License as
3# published by the Free Software Foundation, either version 3 of the
4# License, or (at your option) any later version.
5#
6# This program is distributed in the hope that it will be useful,
7# but WITHOUT ANY WARRANTY; without even the implied warranty of
8# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9# GNU Affero General Public License for more details.
10#
11# You should have received a copy of the GNU Affero General Public License
12# along with this program. If not, see <https://www.gnu.org/licenses/>.
14"""The search page used to search the website."""
16import asyncio
17import logging
18from typing import Any, Final, Literal, TypeAlias, cast
20import orjson as json
21from typed_stream import Stream
23from .. import NAME
24from ..quotes.utils import (
25 Author,
26 Quote,
27 WrongQuote,
28 get_authors,
29 get_quotes,
30 get_wrong_quotes,
31)
32from ..soundboard.data import ALL_SOUNDS, SoundInfo
33from ..utils import search
34from ..utils.decorators import get_setting_or_default, requires_settings
35from ..utils.request_handler import APIRequestHandler, HTMLRequestHandler
36from ..utils.utils import AwaitableValue, ModuleInfo, PageInfo
38LOGGER: Final = logging.getLogger(__name__)
40UnscoredPageInfo: TypeAlias = tuple[
41 tuple[Literal["url"], str],
42 tuple[Literal["title"], str],
43 tuple[Literal["description"], str],
44]
45OldSearchPageInfo: TypeAlias = search.ScoredValue[UnscoredPageInfo]
48def get_module_info() -> ModuleInfo:
49 """Create and return the ModuleInfo for this module."""
50 return ModuleInfo(
51 handlers=(
52 (r"/suche", Search),
53 (r"/api/suche", SearchAPIHandler),
54 ),
55 name="Suche",
56 description="Seite zum Durchsuchen der Webseite",
57 aliases=("/search",),
58 keywords=("Suche",),
59 path="/suche",
60 )
63class Search(HTMLRequestHandler):
64 """The request handler for the search page."""
66 def convert_page_info_to_simple_tuple(
67 self, page_info: PageInfo
68 ) -> UnscoredPageInfo:
69 """Convert PageInfo to tuple of tuples."""
70 return (
71 ("url", self.fix_url(page_info.path)),
72 ("title", page_info.name),
73 ("description", page_info.description),
74 )
76 async def get(self, *, head: bool = False) -> None:
77 """Handle GET requests to the search page."""
78 if head:
79 return
80 await self.render(
81 "pages/search.html",
82 query=self.get_query(),
83 results=await self.search(),
84 )
86 def get_all_page_info(self) -> Stream[PageInfo]:
87 """Return all page infos that can be found."""
88 return (
89 Stream(self.get_module_infos())
90 .flat_map(lambda mi: mi.sub_pages + (mi,))
91 .exclude(lambda pi: pi.hidden)
92 .filter(lambda pi: pi.path)
93 )
95 def get_query(self) -> str:
96 """Return the query."""
97 return str(self.get_argument("q", ""))
99 async def search(self) -> list[dict[str, float | str]]:
100 """Search the website."""
101 result: list[dict[str, str | float]] | None = None
102 if query := self.get_query():
103 try:
104 result = await self.search_new(query)
105 except Exception: # pylint: disable=broad-except
106 LOGGER.exception("App Search request failed")
107 if self.apm_client:
108 self.apm_client.capture_exception() # type: ignore[no-untyped-call]
109 if result is not None:
110 return result
111 return self.search_old(query)
113 @requires_settings("APP_SEARCH", return_=AwaitableValue(None))
114 @get_setting_or_default("APP_SEARCH_ENGINE", NAME.removesuffix("-dev"))
115 async def search_new(
116 self,
117 query: str,
118 *,
119 app_search: Any = ...,
120 app_search_engine: str = ..., # type: ignore[assignment]
121 ) -> list[dict[str, str | float]] | None:
122 """Search the website using Elastic App Search."""
123 return [
124 {
125 "url": self.fix_url(result["url_path"]["raw"]),
126 "title": result["title"]["snippet"],
127 "description": result["meta_description"]["snippet"],
128 "score": result["_meta"]["score"],
129 }
130 for result in (
131 await asyncio.to_thread(
132 app_search.search,
133 app_search_engine,
134 body={
135 "query": query,
136 "filters": {
137 "none": {
138 "quote_rating": {
139 "to": 1,
140 },
141 },
142 },
143 "result_fields": {
144 "title": {
145 "snippet": {
146 "size": 50,
147 "fallback": True,
148 }
149 },
150 "meta_description": {
151 "snippet": {
152 "size": 200,
153 "fallback": True,
154 }
155 },
156 "url_path": {
157 "raw": {},
158 },
159 },
160 },
161 )
162 )["results"]
163 ]
165 def search_old(
166 self, query: str, limit: int = 20
167 ) -> list[dict[str, str | float]]:
168 """Search the website using the old search engine."""
169 page_infos = self.search_old_internal(query)
171 page_infos.sort(reverse=True)
173 return [
174 dict(scored_value.value + (("score", scored_value.score),))
175 for scored_value in page_infos[:limit]
176 ]
178 def search_old_internal(self, query: str) -> list[OldSearchPageInfo]:
179 """Search authors and quotes."""
180 if not (query_object := search.Query(query)):
181 return list(
182 self.get_all_page_info()
183 .map(self.convert_page_info_to_simple_tuple)
184 .map(lambda unscored: search.ScoredValue(1, unscored))
185 )
186 pages: search.DataProvider[PageInfo, UnscoredPageInfo] = (
187 search.DataProvider(
188 self.get_all_page_info,
189 lambda page_info: (
190 page_info.name,
191 page_info.description,
192 *page_info.keywords,
193 ),
194 self.convert_page_info_to_simple_tuple,
195 )
196 )
197 soundboard: search.DataProvider[SoundInfo, UnscoredPageInfo] = (
198 search.DataProvider(
199 ALL_SOUNDS,
200 lambda sound_info: (
201 sound_info.text,
202 sound_info.person.value,
203 ),
204 lambda sound_info: (
205 (
206 "url",
207 self.fix_url(
208 f"/soundboard/{sound_info.person.name}#{sound_info.filename}"
209 ),
210 ),
211 ("title", f"Soundboard ({sound_info.person.value})"),
212 ("description", sound_info.text),
213 ),
214 )
215 )
216 authors: search.DataProvider[Author, UnscoredPageInfo] = (
217 search.DataProvider(
218 get_authors,
219 lambda author: author.name,
220 lambda author: (
221 ("url", self.fix_url(author.get_path())),
222 ("title", "Autoren-Info"),
223 ("description", author.name),
224 ),
225 )
226 )
227 quotes: search.DataProvider[Quote, UnscoredPageInfo] = (
228 search.DataProvider(
229 get_quotes,
230 lambda quote: (quote.quote, quote.author.name),
231 lambda q: (
232 ("url", self.fix_url(q.get_path())),
233 ("title", "Zitat-Info"),
234 ("description", str(q)),
235 ),
236 )
237 )
238 wrong_quotes: search.DataProvider[WrongQuote, UnscoredPageInfo] = (
239 search.DataProvider(
240 lambda: get_wrong_quotes(lambda wq: wq.rating > 0),
241 lambda wq: (wq.quote.quote, wq.author.name),
242 lambda wq: (
243 ("url", self.fix_url(wq.get_path())),
244 ("title", "Falsches Zitat"),
245 ("description", str(wq)),
246 ),
247 )
248 )
249 return search.search(
250 query_object,
251 cast(search.DataProvider[object, UnscoredPageInfo], pages),
252 cast(search.DataProvider[object, UnscoredPageInfo], soundboard),
253 cast(search.DataProvider[object, UnscoredPageInfo], authors),
254 cast(search.DataProvider[object, UnscoredPageInfo], quotes),
255 cast(search.DataProvider[object, UnscoredPageInfo], wrong_quotes),
256 )
259class SearchAPIHandler(APIRequestHandler, Search):
260 """The request handler for the search API."""
262 async def get(self, *, head: bool = False) -> None:
263 """Handle GET requests to the search page."""
264 if head:
265 return
266 await self.finish(json.dumps(await self.search()))