Coverage for an_website/quotes/info.py: 80.682%
88 statements
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-10 18:56 +0000
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-10 18:56 +0000
1# This program is free software: you can redistribute it and/or modify
2# it under the terms of the GNU Affero General Public License as
3# published by the Free Software Foundation, either version 3 of the
4# License, or (at your option) any later version.
5#
6# This program is distributed in the hope that it will be useful,
7# but WITHOUT ANY WARRANTY; without even the implied warranty of
8# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9# GNU Affero General Public License for more details.
10#
11# You should have received a copy of the GNU Affero General Public License
12# along with this program. If not, see <https://www.gnu.org/licenses/>.
14"""Info page to show information about authors and quotes."""
16import logging
17from datetime import datetime, timedelta, timezone
18from typing import Final, cast
19from urllib.parse import quote as quote_url
21import orjson as json
22import regex
23from tornado.httpclient import AsyncHTTPClient, HTTPClientError
24from tornado.web import HTTPError
26from .. import CA_BUNDLE_PATH, EVENT_REDIS
27from ..utils.request_handler import HTMLRequestHandler
28from .utils import get_author_by_id, get_quote_by_id, get_wrong_quotes
30LOGGER: Final = logging.getLogger(__name__)
33class QuotesInfoPage(HTMLRequestHandler):
34 """The request handler used for the info page."""
36 RATELIMIT_GET_LIMIT = 30
38 async def get(self, id_str: str, *, head: bool = False) -> None:
39 """Handle GET requests to the quote info page."""
40 quote_id: int = int(id_str)
41 quote = await get_quote_by_id(quote_id)
42 if quote is None:
43 raise HTTPError(404)
44 if head:
45 return
46 wqs = get_wrong_quotes(lambda wq: wq.quote_id == quote_id, sort=True)
47 await self.render(
48 "pages/quotes/quote_info.html",
49 quote=quote,
50 wrong_quotes=wqs,
51 title="Zitat-Informationen",
52 short_title="Zitat-Info",
53 type="Zitat",
54 id=quote.id,
55 text=str(quote),
56 description=f"Falsch zugeordnete Zitate mit „{quote}“ als Zitat.",
57 create_kwargs={"quote": quote.id},
58 )
61WIKI_API_DE: Final[str] = "https://de.wikipedia.org/w/api.php"
62WIKI_API_EN: Final[str] = "https://en.wikipedia.org/w/api.php"
65async def search_wikipedia(
66 query: str, api: str = WIKI_API_DE
67) -> None | tuple[str, None | str, datetime]:
68 """
69 Search Wikipedia to get information about the query.
71 Return a tuple with the URL and the content.
72 """
73 if not query:
74 return None
75 # try to get the info from Wikipedia
76 response = await AsyncHTTPClient().fetch(
77 (
78 f"{api}?action=opensearch&namespace=0&profile=normal&"
79 f"search={quote_url(query)}&limit=1&redirects=resolve&format=json"
80 ),
81 ca_certs=CA_BUNDLE_PATH,
82 )
83 response_json = json.loads(response.body)
84 if not response_json[1]:
85 if api == WIKI_API_DE:
86 return await search_wikipedia(query, WIKI_API_EN)
87 return None # nothing found
88 page_name = response_json[1][0]
89 # get the URL of the content & replace "," with "%2C"
90 url = str(response_json[3][0]).replace(",", "%2C")
92 return (
93 url,
94 await get_wikipedia_page_content(page_name, api),
95 datetime.now(timezone.utc),
96 )
99async def get_wikipedia_page_content(
100 page_name: str, api: str = WIKI_API_DE
101) -> None | str:
102 """Get content from a Wikipedia page and return it."""
103 response = await AsyncHTTPClient().fetch(
104 (
105 f"{api}?action=query&prop=extracts&exsectionformat=plain&exintro&"
106 f"titles={quote_url(page_name)}&explaintext&format=json&exsentences=5"
107 ),
108 ca_certs=CA_BUNDLE_PATH,
109 )
110 response_json = json.loads(response.body)
111 if "query" not in response_json or "pages" not in response_json["query"]:
112 return None
113 pages: dict[str, str] = response_json["query"]["pages"]
114 page = cast(dict[str, str], tuple(pages.values())[0])
115 if "extract" not in page:
116 return None
117 return page["extract"]
120def fix_author_for_wikipedia_search(author: str) -> str:
121 """
122 Fix author for Wikipedia search.
124 This tries to reduce common problems with authors.
125 So that we can show more information.
126 """
127 author = regex.sub(r"\s+", " ", author)
128 author = regex.sub(r"\s*\(.*\)", "", author)
129 author = regex.sub(r"\s*Werbespruch$", "", author, regex.IGNORECASE)
130 author = regex.sub(r"\s*Werbung$", "", author, regex.IGNORECASE)
131 author = regex.sub(r"^nach\s*", "", author, regex.IGNORECASE)
132 author = regex.sub(r"^Ein\s+", "", author, regex.IGNORECASE)
133 return author
136# time to live in seconds (1 month)
137AUTHOR_INFO_NEW_TTL: Final[int] = 60 * 60 * 24 * 30
140class AuthorsInfoPage(HTMLRequestHandler):
141 """The request handler used for the info page."""
143 RATELIMIT_GET_LIMIT = 5
145 async def get(self, id_str: str, *, head: bool = False) -> None:
146 """Handle GET requests to the author info page."""
147 # pylint: disable=too-complex
148 author_id: int = int(id_str)
149 author = await get_author_by_id(author_id)
150 if author is None:
151 raise HTTPError(404)
152 if head:
153 return
154 if author.info is None:
155 result = None
156 fixed_author_name = fix_author_for_wikipedia_search(author.name)
157 if EVENT_REDIS.is_set():
158 # try to get the info from Redis
159 result = await self.redis.get(
160 self.get_redis_info_key(fixed_author_name)
161 )
162 if result and (len(info := result.split("|", maxsplit=1)) > 1):
163 remaining_ttl = await self.redis.ttl(
164 self.get_redis_info_key(fixed_author_name)
165 )
166 creation_date = datetime.now(tz=timezone.utc) - timedelta(
167 seconds=AUTHOR_INFO_NEW_TTL - remaining_ttl
168 )
169 if len(info) == 1:
170 author.info = (info[0], None, creation_date)
171 else:
172 author.info = (info[0], info[1], creation_date)
173 else:
174 try:
175 author.info = await search_wikipedia(fixed_author_name)
176 except HTTPClientError as err:
177 LOGGER.error(err, "Searching wikipedia failed")
178 if author.info is None or author.info[1] is None:
179 # nothing found
180 LOGGER.info("No information found about %s", repr(author))
181 elif EVENT_REDIS.is_set():
182 await self.redis.setex(
183 self.get_redis_info_key(fixed_author_name),
184 AUTHOR_INFO_NEW_TTL,
185 # value to save (the author info)
186 # type is ignored, because author.info[1] is not None
187 "|".join(author.info[0:2]), # type: ignore[arg-type]
188 )
190 wqs = get_wrong_quotes(
191 lambda wq: wq.author_id == author_id,
192 sort=True,
193 )
195 await self.render(
196 "pages/quotes/author_info.html",
197 author=author,
198 wrong_quotes=wqs,
199 title="Autor-Informationen",
200 short_title="Autor-Info",
201 type="Autor",
202 id=author_id,
203 text=str(author),
204 description=f"Falsch zugeordnete Zitate mit „{author}“ als Autor.",
205 create_kwargs={"author": author_id},
206 )
208 def get_redis_info_key(self, author_name: str) -> str:
209 """Get the key to save the author info with Redis."""
210 return f"{self.redis_prefix}:quote-author-info:{author_name}"