Coverage for an_website/search/search.py: 73.438%
64 statements
« prev ^ index » next coverage.py v7.6.4, created at 2024-11-16 19:56 +0000
« prev ^ index » next coverage.py v7.6.4, created at 2024-11-16 19:56 +0000
1# This program is free software: you can redistribute it and/or modify
2# it under the terms of the GNU Affero General Public License as
3# published by the Free Software Foundation, either version 3 of the
4# License, or (at your option) any later version.
5#
6# This program is distributed in the hope that it will be useful,
7# but WITHOUT ANY WARRANTY; without even the implied warranty of
8# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9# GNU Affero General Public License for more details.
10#
11# You should have received a copy of the GNU Affero General Public License
12# along with this program. If not, see <https://www.gnu.org/licenses/>.
14"""The search page used to search the website."""
16from __future__ import annotations
18import asyncio
19import logging
20from typing import Final, Literal, TypeAlias, cast
22import orjson as json
23from elastic_enterprise_search import AppSearch # type: ignore[import-untyped]
24from typed_stream import Stream
26from .. import NAME
27from ..quotes.utils import (
28 Author,
29 Quote,
30 WrongQuote,
31 get_authors,
32 get_quotes,
33 get_wrong_quotes,
34)
35from ..soundboard.data import ALL_SOUNDS, SoundInfo
36from ..utils import search
37from ..utils.decorators import get_setting_or_default, requires_settings
38from ..utils.request_handler import APIRequestHandler, HTMLRequestHandler
39from ..utils.utils import AwaitableValue, ModuleInfo, PageInfo
41LOGGER: Final = logging.getLogger(__name__)
43UnscoredPageInfo: TypeAlias = tuple[
44 tuple[Literal["url"], str],
45 tuple[Literal["title"], str],
46 tuple[Literal["description"], str],
47]
48OldSearchPageInfo: TypeAlias = search.ScoredValue[UnscoredPageInfo]
51def get_module_info() -> ModuleInfo:
52 """Create and return the ModuleInfo for this module."""
53 return ModuleInfo(
54 handlers=(
55 (r"/suche", Search),
56 (r"/api/suche", SearchAPIHandler),
57 ),
58 name="Suche",
59 description="Seite zum Durchsuchen der Webseite",
60 aliases=("/search",),
61 keywords=("Suche",),
62 path="/suche",
63 )
66class Search(HTMLRequestHandler):
67 """The request handler for the search page."""
69 def convert_page_info_to_simple_tuple(
70 self, page_info: PageInfo
71 ) -> UnscoredPageInfo:
72 """Convert PageInfo to tuple of tuples."""
73 return (
74 ("url", self.fix_url(page_info.path)),
75 ("title", page_info.name),
76 ("description", page_info.description),
77 )
79 async def get(self, *, head: bool = False) -> None:
80 """Handle GET requests to the search page."""
81 if head:
82 return
83 await self.render(
84 "pages/search.html",
85 query=self.get_query(),
86 results=await self.search(),
87 )
89 def get_all_page_info(self) -> Stream[PageInfo]:
90 """Return all page infos that can be found."""
91 return (
92 Stream(self.get_module_infos())
93 .flat_map(lambda mi: mi.sub_pages + (mi,))
94 .exclude(lambda pi: pi.hidden)
95 .filter(lambda pi: pi.path)
96 )
98 def get_query(self) -> str:
99 """Return the query."""
100 return str(self.get_argument("q", ""))
102 async def search(self) -> list[dict[str, float | str]]:
103 """Search the website."""
104 result: list[dict[str, str | float]] | None = None
105 if query := self.get_query():
106 try:
107 result = await self.search_new(query)
108 except Exception: # pylint: disable=broad-except
109 LOGGER.exception("App Search request failed")
110 if self.apm_client:
111 self.apm_client.capture_exception() # type: ignore[no-untyped-call]
112 if result is not None:
113 return result
114 return self.search_old(query)
116 @requires_settings("APP_SEARCH", return_=AwaitableValue(None))
117 @get_setting_or_default("APP_SEARCH_ENGINE", NAME.removesuffix("-dev"))
118 async def search_new( # type: ignore[no-any-unimported]
119 self,
120 query: str,
121 *,
122 app_search: AppSearch = ...,
123 app_search_engine: str = ..., # type: ignore[assignment]
124 ) -> list[dict[str, str | float]] | None:
125 """Search the website using Elastic App Search."""
126 return [
127 {
128 "url": self.fix_url(result["url_path"]["raw"]),
129 "title": result["title"]["snippet"],
130 "description": result["meta_description"]["snippet"],
131 "score": result["_meta"]["score"],
132 }
133 for result in (
134 await asyncio.to_thread(
135 app_search.search,
136 app_search_engine,
137 body={
138 "query": query,
139 "filters": {
140 "none": {
141 "quote_rating": {
142 "to": 1,
143 },
144 },
145 },
146 "result_fields": {
147 "title": {
148 "snippet": {
149 "size": 50,
150 "fallback": True,
151 }
152 },
153 "meta_description": {
154 "snippet": {
155 "size": 200,
156 "fallback": True,
157 }
158 },
159 "url_path": {
160 "raw": {},
161 },
162 },
163 },
164 )
165 )["results"]
166 ]
168 def search_old(
169 self, query: str, limit: int = 20
170 ) -> list[dict[str, str | float]]:
171 """Search the website using the old search engine."""
172 page_infos = self.search_old_internal(query)
174 page_infos.sort(reverse=True)
176 return [
177 dict(scored_value.value + (("score", scored_value.score),))
178 for scored_value in page_infos[:limit]
179 ]
181 def search_old_internal(self, query: str) -> list[OldSearchPageInfo]:
182 """Search authors and quotes."""
183 if not (query_object := search.Query(query)):
184 return list(
185 self.get_all_page_info()
186 .map(self.convert_page_info_to_simple_tuple)
187 .map(lambda unscored: search.ScoredValue(1, unscored))
188 )
189 pages: search.DataProvider[PageInfo, UnscoredPageInfo] = (
190 search.DataProvider(
191 self.get_all_page_info,
192 lambda page_info: (
193 page_info.name,
194 page_info.description,
195 *page_info.keywords,
196 ),
197 self.convert_page_info_to_simple_tuple,
198 )
199 )
200 soundboard: search.DataProvider[SoundInfo, UnscoredPageInfo] = (
201 search.DataProvider(
202 ALL_SOUNDS,
203 lambda sound_info: (
204 sound_info.text,
205 sound_info.person.value,
206 ),
207 lambda sound_info: (
208 (
209 "url",
210 self.fix_url(
211 f"/soundboard/{sound_info.person.name}#{sound_info.filename}"
212 ),
213 ),
214 ("title", f"Soundboard ({sound_info.person.value})"),
215 ("description", sound_info.text),
216 ),
217 )
218 )
219 authors: search.DataProvider[Author, UnscoredPageInfo] = (
220 search.DataProvider(
221 get_authors,
222 lambda author: author.name,
223 lambda author: (
224 ("url", self.fix_url(author.get_path())),
225 ("title", "Autoren-Info"),
226 ("description", author.name),
227 ),
228 )
229 )
230 quotes: search.DataProvider[Quote, UnscoredPageInfo] = (
231 search.DataProvider(
232 get_quotes,
233 lambda quote: (quote.quote, quote.author.name),
234 lambda q: (
235 ("url", self.fix_url(q.get_path())),
236 ("title", "Zitat-Info"),
237 ("description", str(q)),
238 ),
239 )
240 )
241 wrong_quotes: search.DataProvider[WrongQuote, UnscoredPageInfo] = (
242 search.DataProvider(
243 lambda: get_wrong_quotes(lambda wq: wq.rating > 0),
244 lambda wq: (wq.quote.quote, wq.author.name),
245 lambda wq: (
246 ("url", self.fix_url(wq.get_path())),
247 ("title", "Falsches Zitat"),
248 ("description", str(wq)),
249 ),
250 )
251 )
252 return search.search(
253 query_object,
254 cast(search.DataProvider[object, UnscoredPageInfo], pages),
255 cast(search.DataProvider[object, UnscoredPageInfo], soundboard),
256 cast(search.DataProvider[object, UnscoredPageInfo], authors),
257 cast(search.DataProvider[object, UnscoredPageInfo], quotes),
258 cast(search.DataProvider[object, UnscoredPageInfo], wrong_quotes),
259 )
262class SearchAPIHandler(APIRequestHandler, Search):
263 """The request handler for the search API."""
265 async def get(self, *, head: bool = False) -> None:
266 """Handle GET requests to the search page."""
267 if head:
268 return
269 await self.finish(json.dumps(await self.search()))