Coverage for an_website/search/search.py: 73.016%
63 statements
« prev ^ index » next coverage.py v7.8.2, created at 2025-06-01 08:32 +0000
« prev ^ index » next coverage.py v7.8.2, created at 2025-06-01 08:32 +0000
1# This program is free software: you can redistribute it and/or modify
2# it under the terms of the GNU Affero General Public License as
3# published by the Free Software Foundation, either version 3 of the
4# License, or (at your option) any later version.
5#
6# This program is distributed in the hope that it will be useful,
7# but WITHOUT ANY WARRANTY; without even the implied warranty of
8# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9# GNU Affero General Public License for more details.
10#
11# You should have received a copy of the GNU Affero General Public License
12# along with this program. If not, see <https://www.gnu.org/licenses/>.
14"""The search page used to search the website."""
16from __future__ import annotations
18import asyncio
19import logging
20from typing import Any, Final, Literal, TypeAlias, cast
22import orjson as json
23from typed_stream import Stream
25from .. import NAME
26from ..quotes.utils import (
27 Author,
28 Quote,
29 WrongQuote,
30 get_authors,
31 get_quotes,
32 get_wrong_quotes,
33)
34from ..soundboard.data import ALL_SOUNDS, SoundInfo
35from ..utils import search
36from ..utils.decorators import get_setting_or_default, requires_settings
37from ..utils.request_handler import APIRequestHandler, HTMLRequestHandler
38from ..utils.utils import AwaitableValue, ModuleInfo, PageInfo
40LOGGER: Final = logging.getLogger(__name__)
42UnscoredPageInfo: TypeAlias = tuple[
43 tuple[Literal["url"], str],
44 tuple[Literal["title"], str],
45 tuple[Literal["description"], str],
46]
47OldSearchPageInfo: TypeAlias = search.ScoredValue[UnscoredPageInfo]
50def get_module_info() -> ModuleInfo:
51 """Create and return the ModuleInfo for this module."""
52 return ModuleInfo(
53 handlers=(
54 (r"/suche", Search),
55 (r"/api/suche", SearchAPIHandler),
56 ),
57 name="Suche",
58 description="Seite zum Durchsuchen der Webseite",
59 aliases=("/search",),
60 keywords=("Suche",),
61 path="/suche",
62 )
65class Search(HTMLRequestHandler):
66 """The request handler for the search page."""
68 def convert_page_info_to_simple_tuple(
69 self, page_info: PageInfo
70 ) -> UnscoredPageInfo:
71 """Convert PageInfo to tuple of tuples."""
72 return (
73 ("url", self.fix_url(page_info.path)),
74 ("title", page_info.name),
75 ("description", page_info.description),
76 )
78 async def get(self, *, head: bool = False) -> None:
79 """Handle GET requests to the search page."""
80 if head:
81 return
82 await self.render(
83 "pages/search.html",
84 query=self.get_query(),
85 results=await self.search(),
86 )
88 def get_all_page_info(self) -> Stream[PageInfo]:
89 """Return all page infos that can be found."""
90 return (
91 Stream(self.get_module_infos())
92 .flat_map(lambda mi: mi.sub_pages + (mi,))
93 .exclude(lambda pi: pi.hidden)
94 .filter(lambda pi: pi.path)
95 )
97 def get_query(self) -> str:
98 """Return the query."""
99 return str(self.get_argument("q", ""))
101 async def search(self) -> list[dict[str, float | str]]:
102 """Search the website."""
103 result: list[dict[str, str | float]] | None = None
104 if query := self.get_query():
105 try:
106 result = await self.search_new(query)
107 except Exception: # pylint: disable=broad-except
108 LOGGER.exception("App Search request failed")
109 if self.apm_client:
110 self.apm_client.capture_exception() # type: ignore[no-untyped-call]
111 if result is not None:
112 return result
113 return self.search_old(query)
115 @requires_settings("APP_SEARCH", return_=AwaitableValue(None))
116 @get_setting_or_default("APP_SEARCH_ENGINE", NAME.removesuffix("-dev"))
117 async def search_new( # type: ignore[no-any-unimported]
118 self,
119 query: str,
120 *,
121 app_search: Any = ...,
122 app_search_engine: str = ..., # type: ignore[assignment]
123 ) -> list[dict[str, str | float]] | None:
124 """Search the website using Elastic App Search."""
125 return [
126 {
127 "url": self.fix_url(result["url_path"]["raw"]),
128 "title": result["title"]["snippet"],
129 "description": result["meta_description"]["snippet"],
130 "score": result["_meta"]["score"],
131 }
132 for result in (
133 await asyncio.to_thread(
134 app_search.search,
135 app_search_engine,
136 body={
137 "query": query,
138 "filters": {
139 "none": {
140 "quote_rating": {
141 "to": 1,
142 },
143 },
144 },
145 "result_fields": {
146 "title": {
147 "snippet": {
148 "size": 50,
149 "fallback": True,
150 }
151 },
152 "meta_description": {
153 "snippet": {
154 "size": 200,
155 "fallback": True,
156 }
157 },
158 "url_path": {
159 "raw": {},
160 },
161 },
162 },
163 )
164 )["results"]
165 ]
167 def search_old(
168 self, query: str, limit: int = 20
169 ) -> list[dict[str, str | float]]:
170 """Search the website using the old search engine."""
171 page_infos = self.search_old_internal(query)
173 page_infos.sort(reverse=True)
175 return [
176 dict(scored_value.value + (("score", scored_value.score),))
177 for scored_value in page_infos[:limit]
178 ]
180 def search_old_internal(self, query: str) -> list[OldSearchPageInfo]:
181 """Search authors and quotes."""
182 if not (query_object := search.Query(query)):
183 return list(
184 self.get_all_page_info()
185 .map(self.convert_page_info_to_simple_tuple)
186 .map(lambda unscored: search.ScoredValue(1, unscored))
187 )
188 pages: search.DataProvider[PageInfo, UnscoredPageInfo] = (
189 search.DataProvider(
190 self.get_all_page_info,
191 lambda page_info: (
192 page_info.name,
193 page_info.description,
194 *page_info.keywords,
195 ),
196 self.convert_page_info_to_simple_tuple,
197 )
198 )
199 soundboard: search.DataProvider[SoundInfo, UnscoredPageInfo] = (
200 search.DataProvider(
201 ALL_SOUNDS,
202 lambda sound_info: (
203 sound_info.text,
204 sound_info.person.value,
205 ),
206 lambda sound_info: (
207 (
208 "url",
209 self.fix_url(
210 f"/soundboard/{sound_info.person.name}#{sound_info.filename}"
211 ),
212 ),
213 ("title", f"Soundboard ({sound_info.person.value})"),
214 ("description", sound_info.text),
215 ),
216 )
217 )
218 authors: search.DataProvider[Author, UnscoredPageInfo] = (
219 search.DataProvider(
220 get_authors,
221 lambda author: author.name,
222 lambda author: (
223 ("url", self.fix_url(author.get_path())),
224 ("title", "Autoren-Info"),
225 ("description", author.name),
226 ),
227 )
228 )
229 quotes: search.DataProvider[Quote, UnscoredPageInfo] = (
230 search.DataProvider(
231 get_quotes,
232 lambda quote: (quote.quote, quote.author.name),
233 lambda q: (
234 ("url", self.fix_url(q.get_path())),
235 ("title", "Zitat-Info"),
236 ("description", str(q)),
237 ),
238 )
239 )
240 wrong_quotes: search.DataProvider[WrongQuote, UnscoredPageInfo] = (
241 search.DataProvider(
242 lambda: get_wrong_quotes(lambda wq: wq.rating > 0),
243 lambda wq: (wq.quote.quote, wq.author.name),
244 lambda wq: (
245 ("url", self.fix_url(wq.get_path())),
246 ("title", "Falsches Zitat"),
247 ("description", str(wq)),
248 ),
249 )
250 )
251 return search.search(
252 query_object,
253 cast(search.DataProvider[object, UnscoredPageInfo], pages),
254 cast(search.DataProvider[object, UnscoredPageInfo], soundboard),
255 cast(search.DataProvider[object, UnscoredPageInfo], authors),
256 cast(search.DataProvider[object, UnscoredPageInfo], quotes),
257 cast(search.DataProvider[object, UnscoredPageInfo], wrong_quotes),
258 )
261class SearchAPIHandler(APIRequestHandler, Search):
262 """The request handler for the search API."""
264 async def get(self, *, head: bool = False) -> None:
265 """Handle GET requests to the search page."""
266 if head:
267 return
268 await self.finish(json.dumps(await self.search()))