Coverage for an_website/search/search.py: 73.016%

1# This program is free software: you can redistribute it and/or modify

2# it under the terms of the GNU Affero General Public License as

3# published by the Free Software Foundation, either version 3 of the

4# License, or (at your option) any later version.

6# This program is distributed in the hope that it will be useful,

7# but WITHOUT ANY WARRANTY; without even the implied warranty of

8# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

9# GNU Affero General Public License for more details.

10#

11# You should have received a copy of the GNU Affero General Public License

12# along with this program. If not, see <https://www.gnu.org/licenses/>.

14"""The search page used to search the website."""

16from __future__ import annotations

18import asyncio

19import logging

20from typing import Any, Final, Literal, TypeAlias, cast

22import orjson as json

23from typed_stream import Stream

25from .. import NAME

26from ..quotes.utils import (

27 Author,

28 Quote,

29 WrongQuote,

30 get_authors,

31 get_quotes,

32 get_wrong_quotes,

33)

34from ..soundboard.data import ALL_SOUNDS, SoundInfo

35from ..utils import search

36from ..utils.decorators import get_setting_or_default, requires_settings

37from ..utils.request_handler import APIRequestHandler, HTMLRequestHandler

38from ..utils.utils import AwaitableValue, ModuleInfo, PageInfo

40LOGGER: Final = logging.getLogger(__name__)

42UnscoredPageInfo: TypeAlias = tuple[

43 tuple[Literal["url"], str],

44 tuple[Literal["title"], str],

45 tuple[Literal["description"], str],

46]

47OldSearchPageInfo: TypeAlias = search.ScoredValue[UnscoredPageInfo]

50def get_module_info() -> ModuleInfo:

51 """Create and return the ModuleInfo for this module."""

52 return ModuleInfo(

53 handlers=(

54 (r"/suche", Search),

55 (r"/api/suche", SearchAPIHandler),

56 ),

57 name="Suche",

58 description="Seite zum Durchsuchen der Webseite",

59 aliases=("/search",),

60 keywords=("Suche",),

61 path="/suche",

62 )

65class Search(HTMLRequestHandler):

66 """The request handler for the search page."""

68 def convert_page_info_to_simple_tuple(

69 self, page_info: PageInfo

70 ) -> UnscoredPageInfo:

71 """Convert PageInfo to tuple of tuples."""

72 return (

73 ("url", self.fix_url(page_info.path)),

74 ("title", page_info.name),

75 ("description", page_info.description),

76 )

78 async def get(self, *, head: bool = False) -> None:

79 """Handle GET requests to the search page."""

80 if head:

81 return

82 await self.render(

83 "pages/search.html",

84 query=self.get_query(),

85 results=await self.search(),

86 )

88 def get_all_page_info(self) -> Stream[PageInfo]:

89 """Return all page infos that can be found."""

90 return (

91 Stream(self.get_module_infos())

92 .flat_map(lambda mi: mi.sub_pages + (mi,))

93 .exclude(lambda pi: pi.hidden)

94 .filter(lambda pi: pi.path)

95 )

97 def get_query(self) -> str:

98 """Return the query."""

99 return str(self.get_argument("q", ""))

100

101 async def search(self) -> list[dict[str, float | str]]:

102 """Search the website."""

103 result: list[dict[str, str | float]] | None = None

104 if query := self.get_query():

105 try:

106 result = await self.search_new(query)

107 except Exception: # pylint: disable=broad-except

108 LOGGER.exception("App Search request failed")

109 if self.apm_client:

110 self.apm_client.capture_exception() # type: ignore[no-untyped-call]

111 if result is not None:

112 return result

113 return self.search_old(query)

114

115 @requires_settings("APP_SEARCH", return_=AwaitableValue(None))

116 @get_setting_or_default("APP_SEARCH_ENGINE", NAME.removesuffix("-dev"))

117 async def search_new(

118 self,

119 query: str,

120 *,

121 app_search: Any = ...,

122 app_search_engine: str = ..., # type: ignore[assignment]

123 ) -> list[dict[str, str | float]] | None:

124 """Search the website using Elastic App Search."""

125 return [

126 {

127 "url": self.fix_url(result["url_path"]["raw"]),

128 "title": result["title"]["snippet"],

129 "description": result["meta_description"]["snippet"],

130 "score": result["_meta"]["score"],

131 }

132 for result in (

133 await asyncio.to_thread(

134 app_search.search,

135 app_search_engine,

136 body={

137 "query": query,

138 "filters": {

139 "none": {

140 "quote_rating": {

141 "to": 1,

142 },

143 },

144 },

145 "result_fields": {

146 "title": {

147 "snippet": {

148 "size": 50,

149 "fallback": True,

150 }

151 },

152 "meta_description": {

153 "snippet": {

154 "size": 200,

155 "fallback": True,

156 }

157 },

158 "url_path": {

159 "raw": {},

160 },

161 },

162 },

163 )

164 )["results"]

165 ]

166

167 def search_old(

168 self, query: str, limit: int = 20

169 ) -> list[dict[str, str | float]]:

170 """Search the website using the old search engine."""

171 page_infos = self.search_old_internal(query)

172

173 page_infos.sort(reverse=True)

174

175 return [

176 dict(scored_value.value + (("score", scored_value.score),))

177 for scored_value in page_infos[:limit]

178 ]

179

180 def search_old_internal(self, query: str) -> list[OldSearchPageInfo]:

181 """Search authors and quotes."""

182 if not (query_object := search.Query(query)):

183 return list(

184 self.get_all_page_info()

185 .map(self.convert_page_info_to_simple_tuple)

186 .map(lambda unscored: search.ScoredValue(1, unscored))

187 )

188 pages: search.DataProvider[PageInfo, UnscoredPageInfo] = (

189 search.DataProvider(

190 self.get_all_page_info,

191 lambda page_info: (

192 page_info.name,

193 page_info.description,

194 *page_info.keywords,

195 ),

196 self.convert_page_info_to_simple_tuple,

197 )

198 )

199 soundboard: search.DataProvider[SoundInfo, UnscoredPageInfo] = (

200 search.DataProvider(

201 ALL_SOUNDS,

202 lambda sound_info: (

203 sound_info.text,

204 sound_info.person.value,

205 ),

206 lambda sound_info: (

207 (

208 "url",

209 self.fix_url(

210 f"/soundboard/{sound_info.person.name}#{sound_info.filename}"

211 ),

212 ),

213 ("title", f"Soundboard ({sound_info.person.value})"),

214 ("description", sound_info.text),

215 ),

216 )

217 )

218 authors: search.DataProvider[Author, UnscoredPageInfo] = (

219 search.DataProvider(

220 get_authors,

221 lambda author: author.name,

222 lambda author: (

223 ("url", self.fix_url(author.get_path())),

224 ("title", "Autoren-Info"),

225 ("description", author.name),

226 ),

227 )

228 )

229 quotes: search.DataProvider[Quote, UnscoredPageInfo] = (

230 search.DataProvider(

231 get_quotes,

232 lambda quote: (quote.quote, quote.author.name),

233 lambda q: (

234 ("url", self.fix_url(q.get_path())),

235 ("title", "Zitat-Info"),

236 ("description", str(q)),

237 ),

238 )

239 )

240 wrong_quotes: search.DataProvider[WrongQuote, UnscoredPageInfo] = (

241 search.DataProvider(

242 lambda: get_wrong_quotes(lambda wq: wq.rating > 0),

243 lambda wq: (wq.quote.quote, wq.author.name),

244 lambda wq: (

245 ("url", self.fix_url(wq.get_path())),

246 ("title", "Falsches Zitat"),

247 ("description", str(wq)),

248 ),

249 )

250 )

251 return search.search(

252 query_object,

253 cast(search.DataProvider[object, UnscoredPageInfo], pages),

254 cast(search.DataProvider[object, UnscoredPageInfo], soundboard),

255 cast(search.DataProvider[object, UnscoredPageInfo], authors),

256 cast(search.DataProvider[object, UnscoredPageInfo], quotes),

257 cast(search.DataProvider[object, UnscoredPageInfo], wrong_quotes),

258 )

259

260

261class SearchAPIHandler(APIRequestHandler, Search):

262 """The request handler for the search API."""

263

264 async def get(self, *, head: bool = False) -> None:

265 """Handle GET requests to the search page."""

266 if head:

267 return

268 await self.finish(json.dumps(await self.search()))