Coverage for an_website/search/search.py: 73.438%

64 statements  

« prev     ^ index     » next       coverage.py v7.6.4, created at 2024-11-16 19:56 +0000

1# This program is free software: you can redistribute it and/or modify 

2# it under the terms of the GNU Affero General Public License as 

3# published by the Free Software Foundation, either version 3 of the 

4# License, or (at your option) any later version. 

5# 

6# This program is distributed in the hope that it will be useful, 

7# but WITHOUT ANY WARRANTY; without even the implied warranty of 

8# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

9# GNU Affero General Public License for more details. 

10# 

11# You should have received a copy of the GNU Affero General Public License 

12# along with this program. If not, see <https://www.gnu.org/licenses/>. 

13 

14"""The search page used to search the website.""" 

15 

16from __future__ import annotations 

17 

18import asyncio 

19import logging 

20from typing import Final, Literal, TypeAlias, cast 

21 

22import orjson as json 

23from elastic_enterprise_search import AppSearch # type: ignore[import-untyped] 

24from typed_stream import Stream 

25 

26from .. import NAME 

27from ..quotes.utils import ( 

28 Author, 

29 Quote, 

30 WrongQuote, 

31 get_authors, 

32 get_quotes, 

33 get_wrong_quotes, 

34) 

35from ..soundboard.data import ALL_SOUNDS, SoundInfo 

36from ..utils import search 

37from ..utils.decorators import get_setting_or_default, requires_settings 

38from ..utils.request_handler import APIRequestHandler, HTMLRequestHandler 

39from ..utils.utils import AwaitableValue, ModuleInfo, PageInfo 

40 

41LOGGER: Final = logging.getLogger(__name__) 

42 

43UnscoredPageInfo: TypeAlias = tuple[ 

44 tuple[Literal["url"], str], 

45 tuple[Literal["title"], str], 

46 tuple[Literal["description"], str], 

47] 

48OldSearchPageInfo: TypeAlias = search.ScoredValue[UnscoredPageInfo] 

49 

50 

51def get_module_info() -> ModuleInfo: 

52 """Create and return the ModuleInfo for this module.""" 

53 return ModuleInfo( 

54 handlers=( 

55 (r"/suche", Search), 

56 (r"/api/suche", SearchAPIHandler), 

57 ), 

58 name="Suche", 

59 description="Seite zum Durchsuchen der Webseite", 

60 aliases=("/search",), 

61 keywords=("Suche",), 

62 path="/suche", 

63 ) 

64 

65 

66class Search(HTMLRequestHandler): 

67 """The request handler for the search page.""" 

68 

69 def convert_page_info_to_simple_tuple( 

70 self, page_info: PageInfo 

71 ) -> UnscoredPageInfo: 

72 """Convert PageInfo to tuple of tuples.""" 

73 return ( 

74 ("url", self.fix_url(page_info.path)), 

75 ("title", page_info.name), 

76 ("description", page_info.description), 

77 ) 

78 

79 async def get(self, *, head: bool = False) -> None: 

80 """Handle GET requests to the search page.""" 

81 if head: 

82 return 

83 await self.render( 

84 "pages/search.html", 

85 query=self.get_query(), 

86 results=await self.search(), 

87 ) 

88 

89 def get_all_page_info(self) -> Stream[PageInfo]: 

90 """Return all page infos that can be found.""" 

91 return ( 

92 Stream(self.get_module_infos()) 

93 .flat_map(lambda mi: mi.sub_pages + (mi,)) 

94 .exclude(lambda pi: pi.hidden) 

95 .filter(lambda pi: pi.path) 

96 ) 

97 

98 def get_query(self) -> str: 

99 """Return the query.""" 

100 return str(self.get_argument("q", "")) 

101 

102 async def search(self) -> list[dict[str, float | str]]: 

103 """Search the website.""" 

104 result: list[dict[str, str | float]] | None = None 

105 if query := self.get_query(): 

106 try: 

107 result = await self.search_new(query) 

108 except Exception: # pylint: disable=broad-except 

109 LOGGER.exception("App Search request failed") 

110 if self.apm_client: 

111 self.apm_client.capture_exception() # type: ignore[no-untyped-call] 

112 if result is not None: 

113 return result 

114 return self.search_old(query) 

115 

116 @requires_settings("APP_SEARCH", return_=AwaitableValue(None)) 

117 @get_setting_or_default("APP_SEARCH_ENGINE", NAME.removesuffix("-dev")) 

118 async def search_new( # type: ignore[no-any-unimported] 

119 self, 

120 query: str, 

121 *, 

122 app_search: AppSearch = ..., 

123 app_search_engine: str = ..., # type: ignore[assignment] 

124 ) -> list[dict[str, str | float]] | None: 

125 """Search the website using Elastic App Search.""" 

126 return [ 

127 { 

128 "url": self.fix_url(result["url_path"]["raw"]), 

129 "title": result["title"]["snippet"], 

130 "description": result["meta_description"]["snippet"], 

131 "score": result["_meta"]["score"], 

132 } 

133 for result in ( 

134 await asyncio.to_thread( 

135 app_search.search, 

136 app_search_engine, 

137 body={ 

138 "query": query, 

139 "filters": { 

140 "none": { 

141 "quote_rating": { 

142 "to": 1, 

143 }, 

144 }, 

145 }, 

146 "result_fields": { 

147 "title": { 

148 "snippet": { 

149 "size": 50, 

150 "fallback": True, 

151 } 

152 }, 

153 "meta_description": { 

154 "snippet": { 

155 "size": 200, 

156 "fallback": True, 

157 } 

158 }, 

159 "url_path": { 

160 "raw": {}, 

161 }, 

162 }, 

163 }, 

164 ) 

165 )["results"] 

166 ] 

167 

168 def search_old( 

169 self, query: str, limit: int = 20 

170 ) -> list[dict[str, str | float]]: 

171 """Search the website using the old search engine.""" 

172 page_infos = self.search_old_internal(query) 

173 

174 page_infos.sort(reverse=True) 

175 

176 return [ 

177 dict(scored_value.value + (("score", scored_value.score),)) 

178 for scored_value in page_infos[:limit] 

179 ] 

180 

181 def search_old_internal(self, query: str) -> list[OldSearchPageInfo]: 

182 """Search authors and quotes.""" 

183 if not (query_object := search.Query(query)): 

184 return list( 

185 self.get_all_page_info() 

186 .map(self.convert_page_info_to_simple_tuple) 

187 .map(lambda unscored: search.ScoredValue(1, unscored)) 

188 ) 

189 pages: search.DataProvider[PageInfo, UnscoredPageInfo] = ( 

190 search.DataProvider( 

191 self.get_all_page_info, 

192 lambda page_info: ( 

193 page_info.name, 

194 page_info.description, 

195 *page_info.keywords, 

196 ), 

197 self.convert_page_info_to_simple_tuple, 

198 ) 

199 ) 

200 soundboard: search.DataProvider[SoundInfo, UnscoredPageInfo] = ( 

201 search.DataProvider( 

202 ALL_SOUNDS, 

203 lambda sound_info: ( 

204 sound_info.text, 

205 sound_info.person.value, 

206 ), 

207 lambda sound_info: ( 

208 ( 

209 "url", 

210 self.fix_url( 

211 f"/soundboard/{sound_info.person.name}#{sound_info.filename}" 

212 ), 

213 ), 

214 ("title", f"Soundboard ({sound_info.person.value})"), 

215 ("description", sound_info.text), 

216 ), 

217 ) 

218 ) 

219 authors: search.DataProvider[Author, UnscoredPageInfo] = ( 

220 search.DataProvider( 

221 get_authors, 

222 lambda author: author.name, 

223 lambda author: ( 

224 ("url", self.fix_url(author.get_path())), 

225 ("title", "Autoren-Info"), 

226 ("description", author.name), 

227 ), 

228 ) 

229 ) 

230 quotes: search.DataProvider[Quote, UnscoredPageInfo] = ( 

231 search.DataProvider( 

232 get_quotes, 

233 lambda quote: (quote.quote, quote.author.name), 

234 lambda q: ( 

235 ("url", self.fix_url(q.get_path())), 

236 ("title", "Zitat-Info"), 

237 ("description", str(q)), 

238 ), 

239 ) 

240 ) 

241 wrong_quotes: search.DataProvider[WrongQuote, UnscoredPageInfo] = ( 

242 search.DataProvider( 

243 lambda: get_wrong_quotes(lambda wq: wq.rating > 0), 

244 lambda wq: (wq.quote.quote, wq.author.name), 

245 lambda wq: ( 

246 ("url", self.fix_url(wq.get_path())), 

247 ("title", "Falsches Zitat"), 

248 ("description", str(wq)), 

249 ), 

250 ) 

251 ) 

252 return search.search( 

253 query_object, 

254 cast(search.DataProvider[object, UnscoredPageInfo], pages), 

255 cast(search.DataProvider[object, UnscoredPageInfo], soundboard), 

256 cast(search.DataProvider[object, UnscoredPageInfo], authors), 

257 cast(search.DataProvider[object, UnscoredPageInfo], quotes), 

258 cast(search.DataProvider[object, UnscoredPageInfo], wrong_quotes), 

259 ) 

260 

261 

262class SearchAPIHandler(APIRequestHandler, Search): 

263 """The request handler for the search API.""" 

264 

265 async def get(self, *, head: bool = False) -> None: 

266 """Handle GET requests to the search page.""" 

267 if head: 

268 return 

269 await self.finish(json.dumps(await self.search()))