Coverage for an_website / search / search.py: 72.581%
62 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-22 18:49 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-22 18:49 +0000
1# This program is free software: you can redistribute it and/or modify
2# it under the terms of the GNU Affero General Public License as
3# published by the Free Software Foundation, either version 3 of the
4# License, or (at your option) any later version.
5#
6# This program is distributed in the hope that it will be useful,
7# but WITHOUT ANY WARRANTY; without even the implied warranty of
8# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9# GNU Affero General Public License for more details.
10#
11# You should have received a copy of the GNU Affero General Public License
12# along with this program. If not, see <https://www.gnu.org/licenses/>.
14"""The search page used to search the website."""
17import asyncio
18import logging
19from typing import Any, Final, Literal, TypeAlias, cast
21import orjson as json
22from typed_stream import Stream
24from .. import NAME
25from ..quotes.utils import (
26 Author,
27 Quote,
28 WrongQuote,
29 get_authors,
30 get_quotes,
31 get_wrong_quotes,
32)
33from ..soundboard.data import ALL_SOUNDS, SoundInfo
34from ..utils import search
35from ..utils.decorators import get_setting_or_default, requires_settings
36from ..utils.request_handler import APIRequestHandler, HTMLRequestHandler
37from ..utils.utils import AwaitableValue, ModuleInfo, PageInfo
39LOGGER: Final = logging.getLogger(__name__)
41UnscoredPageInfo: TypeAlias = tuple[
42 tuple[Literal["url"], str],
43 tuple[Literal["title"], str],
44 tuple[Literal["description"], str],
45]
46OldSearchPageInfo: TypeAlias = search.ScoredValue[UnscoredPageInfo]
49def get_module_info() -> ModuleInfo:
50 """Create and return the ModuleInfo for this module."""
51 return ModuleInfo(
52 handlers=(
53 (r"/suche", Search),
54 (r"/api/suche", SearchAPIHandler),
55 ),
56 name="Suche",
57 description="Seite zum Durchsuchen der Webseite",
58 aliases=("/search",),
59 keywords=("Suche",),
60 path="/suche",
61 )
64class Search(HTMLRequestHandler):
65 """The request handler for the search page."""
67 def convert_page_info_to_simple_tuple(
68 self, page_info: PageInfo
69 ) -> UnscoredPageInfo:
70 """Convert PageInfo to tuple of tuples."""
71 return (
72 ("url", self.fix_url(page_info.path)),
73 ("title", page_info.name),
74 ("description", page_info.description),
75 )
77 async def get(self, *, head: bool = False) -> None:
78 """Handle GET requests to the search page."""
79 if head:
80 return
81 await self.render(
82 "pages/search.html",
83 query=self.get_query(),
84 results=await self.search(),
85 )
87 def get_all_page_info(self) -> Stream[PageInfo]:
88 """Return all page infos that can be found."""
89 return (
90 Stream(self.get_module_infos())
91 .flat_map(lambda mi: mi.sub_pages + (mi,))
92 .exclude(lambda pi: pi.hidden)
93 .filter(lambda pi: pi.path)
94 )
96 def get_query(self) -> str:
97 """Return the query."""
98 return str(self.get_argument("q", ""))
100 async def search(self) -> list[dict[str, float | str]]:
101 """Search the website."""
102 result: list[dict[str, str | float]] | None = None
103 if query := self.get_query():
104 try:
105 result = await self.search_new(query)
106 except Exception: # pylint: disable=broad-except
107 LOGGER.exception("App Search request failed")
108 if self.apm_client:
109 self.apm_client.capture_exception() # type: ignore[no-untyped-call]
110 if result is not None:
111 return result
112 return self.search_old(query)
114 @requires_settings("APP_SEARCH", return_=AwaitableValue(None))
115 @get_setting_or_default("APP_SEARCH_ENGINE", NAME.removesuffix("-dev"))
116 async def search_new(
117 self,
118 query: str,
119 *,
120 app_search: Any = ...,
121 app_search_engine: str = ..., # type: ignore[assignment]
122 ) -> list[dict[str, str | float]] | None:
123 """Search the website using Elastic App Search."""
124 return [
125 {
126 "url": self.fix_url(result["url_path"]["raw"]),
127 "title": result["title"]["snippet"],
128 "description": result["meta_description"]["snippet"],
129 "score": result["_meta"]["score"],
130 }
131 for result in (
132 await asyncio.to_thread(
133 app_search.search,
134 app_search_engine,
135 body={
136 "query": query,
137 "filters": {
138 "none": {
139 "quote_rating": {
140 "to": 1,
141 },
142 },
143 },
144 "result_fields": {
145 "title": {
146 "snippet": {
147 "size": 50,
148 "fallback": True,
149 }
150 },
151 "meta_description": {
152 "snippet": {
153 "size": 200,
154 "fallback": True,
155 }
156 },
157 "url_path": {
158 "raw": {},
159 },
160 },
161 },
162 )
163 )["results"]
164 ]
166 def search_old(
167 self, query: str, limit: int = 20
168 ) -> list[dict[str, str | float]]:
169 """Search the website using the old search engine."""
170 page_infos = self.search_old_internal(query)
172 page_infos.sort(reverse=True)
174 return [
175 dict(scored_value.value + (("score", scored_value.score),))
176 for scored_value in page_infos[:limit]
177 ]
179 def search_old_internal(self, query: str) -> list[OldSearchPageInfo]:
180 """Search authors and quotes."""
181 if not (query_object := search.Query(query)):
182 return list(
183 self.get_all_page_info()
184 .map(self.convert_page_info_to_simple_tuple)
185 .map(lambda unscored: search.ScoredValue(1, unscored))
186 )
187 pages: search.DataProvider[PageInfo, UnscoredPageInfo] = (
188 search.DataProvider(
189 self.get_all_page_info,
190 lambda page_info: (
191 page_info.name,
192 page_info.description,
193 *page_info.keywords,
194 ),
195 self.convert_page_info_to_simple_tuple,
196 )
197 )
198 soundboard: search.DataProvider[SoundInfo, UnscoredPageInfo] = (
199 search.DataProvider(
200 ALL_SOUNDS,
201 lambda sound_info: (
202 sound_info.text,
203 sound_info.person.value,
204 ),
205 lambda sound_info: (
206 (
207 "url",
208 self.fix_url(
209 f"/soundboard/{sound_info.person.name}#{sound_info.filename}"
210 ),
211 ),
212 ("title", f"Soundboard ({sound_info.person.value})"),
213 ("description", sound_info.text),
214 ),
215 )
216 )
217 authors: search.DataProvider[Author, UnscoredPageInfo] = (
218 search.DataProvider(
219 get_authors,
220 lambda author: author.name,
221 lambda author: (
222 ("url", self.fix_url(author.get_path())),
223 ("title", "Autoren-Info"),
224 ("description", author.name),
225 ),
226 )
227 )
228 quotes: search.DataProvider[Quote, UnscoredPageInfo] = (
229 search.DataProvider(
230 get_quotes,
231 lambda quote: (quote.quote, quote.author.name),
232 lambda q: (
233 ("url", self.fix_url(q.get_path())),
234 ("title", "Zitat-Info"),
235 ("description", str(q)),
236 ),
237 )
238 )
239 wrong_quotes: search.DataProvider[WrongQuote, UnscoredPageInfo] = (
240 search.DataProvider(
241 lambda: get_wrong_quotes(lambda wq: wq.rating > 0),
242 lambda wq: (wq.quote.quote, wq.author.name),
243 lambda wq: (
244 ("url", self.fix_url(wq.get_path())),
245 ("title", "Falsches Zitat"),
246 ("description", str(wq)),
247 ),
248 )
249 )
250 return search.search(
251 query_object,
252 cast(search.DataProvider[object, UnscoredPageInfo], pages),
253 cast(search.DataProvider[object, UnscoredPageInfo], soundboard),
254 cast(search.DataProvider[object, UnscoredPageInfo], authors),
255 cast(search.DataProvider[object, UnscoredPageInfo], quotes),
256 cast(search.DataProvider[object, UnscoredPageInfo], wrong_quotes),
257 )
260class SearchAPIHandler(APIRequestHandler, Search):
261 """The request handler for the search API."""
263 async def get(self, *, head: bool = False) -> None:
264 """Handle GET requests to the search page."""
265 if head:
266 return
267 await self.finish(json.dumps(await self.search()))