Coverage for an_website/search/search.py: 73.016%

63 statements  

« prev     ^ index     » next       coverage.py v7.8.2, created at 2025-06-01 08:32 +0000

1# This program is free software: you can redistribute it and/or modify 

2# it under the terms of the GNU Affero General Public License as 

3# published by the Free Software Foundation, either version 3 of the 

4# License, or (at your option) any later version. 

5# 

6# This program is distributed in the hope that it will be useful, 

7# but WITHOUT ANY WARRANTY; without even the implied warranty of 

8# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

9# GNU Affero General Public License for more details. 

10# 

11# You should have received a copy of the GNU Affero General Public License 

12# along with this program. If not, see <https://www.gnu.org/licenses/>. 

13 

14"""The search page used to search the website.""" 

15 

16from __future__ import annotations 

17 

18import asyncio 

19import logging 

20from typing import Any, Final, Literal, TypeAlias, cast 

21 

22import orjson as json 

23from typed_stream import Stream 

24 

25from .. import NAME 

26from ..quotes.utils import ( 

27 Author, 

28 Quote, 

29 WrongQuote, 

30 get_authors, 

31 get_quotes, 

32 get_wrong_quotes, 

33) 

34from ..soundboard.data import ALL_SOUNDS, SoundInfo 

35from ..utils import search 

36from ..utils.decorators import get_setting_or_default, requires_settings 

37from ..utils.request_handler import APIRequestHandler, HTMLRequestHandler 

38from ..utils.utils import AwaitableValue, ModuleInfo, PageInfo 

39 

40LOGGER: Final = logging.getLogger(__name__) 

41 

42UnscoredPageInfo: TypeAlias = tuple[ 

43 tuple[Literal["url"], str], 

44 tuple[Literal["title"], str], 

45 tuple[Literal["description"], str], 

46] 

47OldSearchPageInfo: TypeAlias = search.ScoredValue[UnscoredPageInfo] 

48 

49 

50def get_module_info() -> ModuleInfo: 

51 """Create and return the ModuleInfo for this module.""" 

52 return ModuleInfo( 

53 handlers=( 

54 (r"/suche", Search), 

55 (r"/api/suche", SearchAPIHandler), 

56 ), 

57 name="Suche", 

58 description="Seite zum Durchsuchen der Webseite", 

59 aliases=("/search",), 

60 keywords=("Suche",), 

61 path="/suche", 

62 ) 

63 

64 

65class Search(HTMLRequestHandler): 

66 """The request handler for the search page.""" 

67 

68 def convert_page_info_to_simple_tuple( 

69 self, page_info: PageInfo 

70 ) -> UnscoredPageInfo: 

71 """Convert PageInfo to tuple of tuples.""" 

72 return ( 

73 ("url", self.fix_url(page_info.path)), 

74 ("title", page_info.name), 

75 ("description", page_info.description), 

76 ) 

77 

78 async def get(self, *, head: bool = False) -> None: 

79 """Handle GET requests to the search page.""" 

80 if head: 

81 return 

82 await self.render( 

83 "pages/search.html", 

84 query=self.get_query(), 

85 results=await self.search(), 

86 ) 

87 

88 def get_all_page_info(self) -> Stream[PageInfo]: 

89 """Return all page infos that can be found.""" 

90 return ( 

91 Stream(self.get_module_infos()) 

92 .flat_map(lambda mi: mi.sub_pages + (mi,)) 

93 .exclude(lambda pi: pi.hidden) 

94 .filter(lambda pi: pi.path) 

95 ) 

96 

97 def get_query(self) -> str: 

98 """Return the query.""" 

99 return str(self.get_argument("q", "")) 

100 

101 async def search(self) -> list[dict[str, float | str]]: 

102 """Search the website.""" 

103 result: list[dict[str, str | float]] | None = None 

104 if query := self.get_query(): 

105 try: 

106 result = await self.search_new(query) 

107 except Exception: # pylint: disable=broad-except 

108 LOGGER.exception("App Search request failed") 

109 if self.apm_client: 

110 self.apm_client.capture_exception() # type: ignore[no-untyped-call] 

111 if result is not None: 

112 return result 

113 return self.search_old(query) 

114 

115 @requires_settings("APP_SEARCH", return_=AwaitableValue(None)) 

116 @get_setting_or_default("APP_SEARCH_ENGINE", NAME.removesuffix("-dev")) 

117 async def search_new( # type: ignore[no-any-unimported] 

118 self, 

119 query: str, 

120 *, 

121 app_search: Any = ..., 

122 app_search_engine: str = ..., # type: ignore[assignment] 

123 ) -> list[dict[str, str | float]] | None: 

124 """Search the website using Elastic App Search.""" 

125 return [ 

126 { 

127 "url": self.fix_url(result["url_path"]["raw"]), 

128 "title": result["title"]["snippet"], 

129 "description": result["meta_description"]["snippet"], 

130 "score": result["_meta"]["score"], 

131 } 

132 for result in ( 

133 await asyncio.to_thread( 

134 app_search.search, 

135 app_search_engine, 

136 body={ 

137 "query": query, 

138 "filters": { 

139 "none": { 

140 "quote_rating": { 

141 "to": 1, 

142 }, 

143 }, 

144 }, 

145 "result_fields": { 

146 "title": { 

147 "snippet": { 

148 "size": 50, 

149 "fallback": True, 

150 } 

151 }, 

152 "meta_description": { 

153 "snippet": { 

154 "size": 200, 

155 "fallback": True, 

156 } 

157 }, 

158 "url_path": { 

159 "raw": {}, 

160 }, 

161 }, 

162 }, 

163 ) 

164 )["results"] 

165 ] 

166 

167 def search_old( 

168 self, query: str, limit: int = 20 

169 ) -> list[dict[str, str | float]]: 

170 """Search the website using the old search engine.""" 

171 page_infos = self.search_old_internal(query) 

172 

173 page_infos.sort(reverse=True) 

174 

175 return [ 

176 dict(scored_value.value + (("score", scored_value.score),)) 

177 for scored_value in page_infos[:limit] 

178 ] 

179 

180 def search_old_internal(self, query: str) -> list[OldSearchPageInfo]: 

181 """Search authors and quotes.""" 

182 if not (query_object := search.Query(query)): 

183 return list( 

184 self.get_all_page_info() 

185 .map(self.convert_page_info_to_simple_tuple) 

186 .map(lambda unscored: search.ScoredValue(1, unscored)) 

187 ) 

188 pages: search.DataProvider[PageInfo, UnscoredPageInfo] = ( 

189 search.DataProvider( 

190 self.get_all_page_info, 

191 lambda page_info: ( 

192 page_info.name, 

193 page_info.description, 

194 *page_info.keywords, 

195 ), 

196 self.convert_page_info_to_simple_tuple, 

197 ) 

198 ) 

199 soundboard: search.DataProvider[SoundInfo, UnscoredPageInfo] = ( 

200 search.DataProvider( 

201 ALL_SOUNDS, 

202 lambda sound_info: ( 

203 sound_info.text, 

204 sound_info.person.value, 

205 ), 

206 lambda sound_info: ( 

207 ( 

208 "url", 

209 self.fix_url( 

210 f"/soundboard/{sound_info.person.name}#{sound_info.filename}" 

211 ), 

212 ), 

213 ("title", f"Soundboard ({sound_info.person.value})"), 

214 ("description", sound_info.text), 

215 ), 

216 ) 

217 ) 

218 authors: search.DataProvider[Author, UnscoredPageInfo] = ( 

219 search.DataProvider( 

220 get_authors, 

221 lambda author: author.name, 

222 lambda author: ( 

223 ("url", self.fix_url(author.get_path())), 

224 ("title", "Autoren-Info"), 

225 ("description", author.name), 

226 ), 

227 ) 

228 ) 

229 quotes: search.DataProvider[Quote, UnscoredPageInfo] = ( 

230 search.DataProvider( 

231 get_quotes, 

232 lambda quote: (quote.quote, quote.author.name), 

233 lambda q: ( 

234 ("url", self.fix_url(q.get_path())), 

235 ("title", "Zitat-Info"), 

236 ("description", str(q)), 

237 ), 

238 ) 

239 ) 

240 wrong_quotes: search.DataProvider[WrongQuote, UnscoredPageInfo] = ( 

241 search.DataProvider( 

242 lambda: get_wrong_quotes(lambda wq: wq.rating > 0), 

243 lambda wq: (wq.quote.quote, wq.author.name), 

244 lambda wq: ( 

245 ("url", self.fix_url(wq.get_path())), 

246 ("title", "Falsches Zitat"), 

247 ("description", str(wq)), 

248 ), 

249 ) 

250 ) 

251 return search.search( 

252 query_object, 

253 cast(search.DataProvider[object, UnscoredPageInfo], pages), 

254 cast(search.DataProvider[object, UnscoredPageInfo], soundboard), 

255 cast(search.DataProvider[object, UnscoredPageInfo], authors), 

256 cast(search.DataProvider[object, UnscoredPageInfo], quotes), 

257 cast(search.DataProvider[object, UnscoredPageInfo], wrong_quotes), 

258 ) 

259 

260 

261class SearchAPIHandler(APIRequestHandler, Search): 

262 """The request handler for the search API.""" 

263 

264 async def get(self, *, head: bool = False) -> None: 

265 """Handle GET requests to the search page.""" 

266 if head: 

267 return 

268 await self.finish(json.dumps(await self.search()))