Coverage for an_website / search / search.py: 72.581%

62 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-03-22 18:49 +0000

1# This program is free software: you can redistribute it and/or modify 

2# it under the terms of the GNU Affero General Public License as 

3# published by the Free Software Foundation, either version 3 of the 

4# License, or (at your option) any later version. 

5# 

6# This program is distributed in the hope that it will be useful, 

7# but WITHOUT ANY WARRANTY; without even the implied warranty of 

8# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

9# GNU Affero General Public License for more details. 

10# 

11# You should have received a copy of the GNU Affero General Public License 

12# along with this program. If not, see <https://www.gnu.org/licenses/>. 

13 

14"""The search page used to search the website.""" 

15 

16 

17import asyncio 

18import logging 

19from typing import Any, Final, Literal, TypeAlias, cast 

20 

21import orjson as json 

22from typed_stream import Stream 

23 

24from .. import NAME 

25from ..quotes.utils import ( 

26 Author, 

27 Quote, 

28 WrongQuote, 

29 get_authors, 

30 get_quotes, 

31 get_wrong_quotes, 

32) 

33from ..soundboard.data import ALL_SOUNDS, SoundInfo 

34from ..utils import search 

35from ..utils.decorators import get_setting_or_default, requires_settings 

36from ..utils.request_handler import APIRequestHandler, HTMLRequestHandler 

37from ..utils.utils import AwaitableValue, ModuleInfo, PageInfo 

38 

39LOGGER: Final = logging.getLogger(__name__) 

40 

41UnscoredPageInfo: TypeAlias = tuple[ 

42 tuple[Literal["url"], str], 

43 tuple[Literal["title"], str], 

44 tuple[Literal["description"], str], 

45] 

46OldSearchPageInfo: TypeAlias = search.ScoredValue[UnscoredPageInfo] 

47 

48 

49def get_module_info() -> ModuleInfo: 

50 """Create and return the ModuleInfo for this module.""" 

51 return ModuleInfo( 

52 handlers=( 

53 (r"/suche", Search), 

54 (r"/api/suche", SearchAPIHandler), 

55 ), 

56 name="Suche", 

57 description="Seite zum Durchsuchen der Webseite", 

58 aliases=("/search",), 

59 keywords=("Suche",), 

60 path="/suche", 

61 ) 

62 

63 

64class Search(HTMLRequestHandler): 

65 """The request handler for the search page.""" 

66 

67 def convert_page_info_to_simple_tuple( 

68 self, page_info: PageInfo 

69 ) -> UnscoredPageInfo: 

70 """Convert PageInfo to tuple of tuples.""" 

71 return ( 

72 ("url", self.fix_url(page_info.path)), 

73 ("title", page_info.name), 

74 ("description", page_info.description), 

75 ) 

76 

77 async def get(self, *, head: bool = False) -> None: 

78 """Handle GET requests to the search page.""" 

79 if head: 

80 return 

81 await self.render( 

82 "pages/search.html", 

83 query=self.get_query(), 

84 results=await self.search(), 

85 ) 

86 

87 def get_all_page_info(self) -> Stream[PageInfo]: 

88 """Return all page infos that can be found.""" 

89 return ( 

90 Stream(self.get_module_infos()) 

91 .flat_map(lambda mi: mi.sub_pages + (mi,)) 

92 .exclude(lambda pi: pi.hidden) 

93 .filter(lambda pi: pi.path) 

94 ) 

95 

96 def get_query(self) -> str: 

97 """Return the query.""" 

98 return str(self.get_argument("q", "")) 

99 

100 async def search(self) -> list[dict[str, float | str]]: 

101 """Search the website.""" 

102 result: list[dict[str, str | float]] | None = None 

103 if query := self.get_query(): 

104 try: 

105 result = await self.search_new(query) 

106 except Exception: # pylint: disable=broad-except 

107 LOGGER.exception("App Search request failed") 

108 if self.apm_client: 

109 self.apm_client.capture_exception() # type: ignore[no-untyped-call] 

110 if result is not None: 

111 return result 

112 return self.search_old(query) 

113 

114 @requires_settings("APP_SEARCH", return_=AwaitableValue(None)) 

115 @get_setting_or_default("APP_SEARCH_ENGINE", NAME.removesuffix("-dev")) 

116 async def search_new( 

117 self, 

118 query: str, 

119 *, 

120 app_search: Any = ..., 

121 app_search_engine: str = ..., # type: ignore[assignment] 

122 ) -> list[dict[str, str | float]] | None: 

123 """Search the website using Elastic App Search.""" 

124 return [ 

125 { 

126 "url": self.fix_url(result["url_path"]["raw"]), 

127 "title": result["title"]["snippet"], 

128 "description": result["meta_description"]["snippet"], 

129 "score": result["_meta"]["score"], 

130 } 

131 for result in ( 

132 await asyncio.to_thread( 

133 app_search.search, 

134 app_search_engine, 

135 body={ 

136 "query": query, 

137 "filters": { 

138 "none": { 

139 "quote_rating": { 

140 "to": 1, 

141 }, 

142 }, 

143 }, 

144 "result_fields": { 

145 "title": { 

146 "snippet": { 

147 "size": 50, 

148 "fallback": True, 

149 } 

150 }, 

151 "meta_description": { 

152 "snippet": { 

153 "size": 200, 

154 "fallback": True, 

155 } 

156 }, 

157 "url_path": { 

158 "raw": {}, 

159 }, 

160 }, 

161 }, 

162 ) 

163 )["results"] 

164 ] 

165 

166 def search_old( 

167 self, query: str, limit: int = 20 

168 ) -> list[dict[str, str | float]]: 

169 """Search the website using the old search engine.""" 

170 page_infos = self.search_old_internal(query) 

171 

172 page_infos.sort(reverse=True) 

173 

174 return [ 

175 dict(scored_value.value + (("score", scored_value.score),)) 

176 for scored_value in page_infos[:limit] 

177 ] 

178 

179 def search_old_internal(self, query: str) -> list[OldSearchPageInfo]: 

180 """Search authors and quotes.""" 

181 if not (query_object := search.Query(query)): 

182 return list( 

183 self.get_all_page_info() 

184 .map(self.convert_page_info_to_simple_tuple) 

185 .map(lambda unscored: search.ScoredValue(1, unscored)) 

186 ) 

187 pages: search.DataProvider[PageInfo, UnscoredPageInfo] = ( 

188 search.DataProvider( 

189 self.get_all_page_info, 

190 lambda page_info: ( 

191 page_info.name, 

192 page_info.description, 

193 *page_info.keywords, 

194 ), 

195 self.convert_page_info_to_simple_tuple, 

196 ) 

197 ) 

198 soundboard: search.DataProvider[SoundInfo, UnscoredPageInfo] = ( 

199 search.DataProvider( 

200 ALL_SOUNDS, 

201 lambda sound_info: ( 

202 sound_info.text, 

203 sound_info.person.value, 

204 ), 

205 lambda sound_info: ( 

206 ( 

207 "url", 

208 self.fix_url( 

209 f"/soundboard/{sound_info.person.name}#{sound_info.filename}" 

210 ), 

211 ), 

212 ("title", f"Soundboard ({sound_info.person.value})"), 

213 ("description", sound_info.text), 

214 ), 

215 ) 

216 ) 

217 authors: search.DataProvider[Author, UnscoredPageInfo] = ( 

218 search.DataProvider( 

219 get_authors, 

220 lambda author: author.name, 

221 lambda author: ( 

222 ("url", self.fix_url(author.get_path())), 

223 ("title", "Autoren-Info"), 

224 ("description", author.name), 

225 ), 

226 ) 

227 ) 

228 quotes: search.DataProvider[Quote, UnscoredPageInfo] = ( 

229 search.DataProvider( 

230 get_quotes, 

231 lambda quote: (quote.quote, quote.author.name), 

232 lambda q: ( 

233 ("url", self.fix_url(q.get_path())), 

234 ("title", "Zitat-Info"), 

235 ("description", str(q)), 

236 ), 

237 ) 

238 ) 

239 wrong_quotes: search.DataProvider[WrongQuote, UnscoredPageInfo] = ( 

240 search.DataProvider( 

241 lambda: get_wrong_quotes(lambda wq: wq.rating > 0), 

242 lambda wq: (wq.quote.quote, wq.author.name), 

243 lambda wq: ( 

244 ("url", self.fix_url(wq.get_path())), 

245 ("title", "Falsches Zitat"), 

246 ("description", str(wq)), 

247 ), 

248 ) 

249 ) 

250 return search.search( 

251 query_object, 

252 cast(search.DataProvider[object, UnscoredPageInfo], pages), 

253 cast(search.DataProvider[object, UnscoredPageInfo], soundboard), 

254 cast(search.DataProvider[object, UnscoredPageInfo], authors), 

255 cast(search.DataProvider[object, UnscoredPageInfo], quotes), 

256 cast(search.DataProvider[object, UnscoredPageInfo], wrong_quotes), 

257 ) 

258 

259 

260class SearchAPIHandler(APIRequestHandler, Search): 

261 """The request handler for the search API.""" 

262 

263 async def get(self, *, head: bool = False) -> None: 

264 """Handle GET requests to the search page.""" 

265 if head: 

266 return 

267 await self.finish(json.dumps(await self.search()))