Coverage for an_website / quotes / info.py: 82.558%
86 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-04 20:05 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-04 20:05 +0000
1# This program is free software: you can redistribute it and/or modify
2# it under the terms of the GNU Affero General Public License as
3# published by the Free Software Foundation, either version 3 of the
4# License, or (at your option) any later version.
5#
6# This program is distributed in the hope that it will be useful,
7# but WITHOUT ANY WARRANTY; without even the implied warranty of
8# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9# GNU Affero General Public License for more details.
10#
11# You should have received a copy of the GNU Affero General Public License
12# along with this program. If not, see <https://www.gnu.org/licenses/>.
14"""Info page to show information about authors and quotes."""
16from __future__ import annotations
18import logging
19from datetime import datetime, timedelta, timezone
20from typing import Final, cast
21from urllib.parse import quote as quote_url
23import orjson as json
24import regex
25from tornado.httpclient import AsyncHTTPClient
26from tornado.web import HTTPError
28from .. import CA_BUNDLE_PATH, EVENT_REDIS
29from ..utils.request_handler import HTMLRequestHandler
30from .utils import get_author_by_id, get_quote_by_id, get_wrong_quotes
32LOGGER: Final = logging.getLogger(__name__)
35class QuotesInfoPage(HTMLRequestHandler):
36 """The request handler used for the info page."""
38 RATELIMIT_GET_LIMIT = 30
40 async def get(self, id_str: str, *, head: bool = False) -> None:
41 """Handle GET requests to the quote info page."""
42 quote_id: int = int(id_str)
43 quote = await get_quote_by_id(quote_id)
44 if quote is None:
45 raise HTTPError(404)
46 if head:
47 return
48 wqs = get_wrong_quotes(lambda wq: wq.quote_id == quote_id, sort=True)
49 await self.render(
50 "pages/quotes/quote_info.html",
51 quote=quote,
52 wrong_quotes=wqs,
53 title="Zitat-Informationen",
54 short_title="Zitat-Info",
55 type="Zitat",
56 id=quote.id,
57 text=str(quote),
58 description=f"Falsch zugeordnete Zitate mit „{quote}“ als Zitat.",
59 create_kwargs={"quote": quote.id},
60 )
63WIKI_API_DE: Final[str] = "https://de.wikipedia.org/w/api.php"
64WIKI_API_EN: Final[str] = "https://en.wikipedia.org/w/api.php"
67async def search_wikipedia(
68 query: str, api: str = WIKI_API_DE
69) -> None | tuple[str, None | str, datetime]:
70 """
71 Search Wikipedia to get information about the query.
73 Return a tuple with the URL and the content.
74 """
75 if not query:
76 return None
77 # try to get the info from Wikipedia
78 response = await AsyncHTTPClient().fetch(
79 (
80 f"{api}?action=opensearch&namespace=0&profile=normal&"
81 f"search={quote_url(query)}&limit=1&redirects=resolve&format=json"
82 ),
83 ca_certs=CA_BUNDLE_PATH,
84 )
85 response_json = json.loads(response.body)
86 if not response_json[1]:
87 if api == WIKI_API_DE:
88 return await search_wikipedia(query, WIKI_API_EN)
89 return None # nothing found
90 page_name = response_json[1][0]
91 # get the URL of the content & replace "," with "%2C"
92 url = str(response_json[3][0]).replace(",", "%2C")
94 return (
95 url,
96 await get_wikipedia_page_content(page_name, api),
97 datetime.now(timezone.utc),
98 )
101async def get_wikipedia_page_content(
102 page_name: str, api: str = WIKI_API_DE
103) -> None | str:
104 """Get content from a Wikipedia page and return it."""
105 response = await AsyncHTTPClient().fetch(
106 (
107 f"{api}?action=query&prop=extracts&exsectionformat=plain&exintro&"
108 f"titles={quote_url(page_name)}&explaintext&format=json&exsentences=5"
109 ),
110 ca_certs=CA_BUNDLE_PATH,
111 )
112 response_json = json.loads(response.body)
113 if "query" not in response_json or "pages" not in response_json["query"]:
114 return None
115 pages: dict[str, str] = response_json["query"]["pages"]
116 page = cast(dict[str, str], tuple(pages.values())[0])
117 if "extract" not in page:
118 return None
119 return page["extract"]
122def fix_author_for_wikipedia_search(author: str) -> str:
123 """
124 Fix author for Wikipedia search.
126 This tries to reduce common problems with authors.
127 So that we can show more information.
128 """
129 author = regex.sub(r"\s+", " ", author)
130 author = regex.sub(r"\s*\(.*\)", "", author)
131 author = regex.sub(r"\s*Werbespruch$", "", author, regex.IGNORECASE)
132 author = regex.sub(r"\s*Werbung$", "", author, regex.IGNORECASE)
133 author = regex.sub(r"^nach\s*", "", author, regex.IGNORECASE)
134 author = regex.sub(r"^Ein\s+", "", author, regex.IGNORECASE)
135 return author
138# time to live in seconds (1 month)
139AUTHOR_INFO_NEW_TTL: Final[int] = 60 * 60 * 24 * 30
142class AuthorsInfoPage(HTMLRequestHandler):
143 """The request handler used for the info page."""
145 RATELIMIT_GET_LIMIT = 5
147 async def get(self, id_str: str, *, head: bool = False) -> None:
148 """Handle GET requests to the author info page."""
149 author_id: int = int(id_str)
150 author = await get_author_by_id(author_id)
151 if author is None:
152 raise HTTPError(404)
153 if head:
154 return
155 if author.info is None:
156 result = None
157 fixed_author_name = fix_author_for_wikipedia_search(author.name)
158 if EVENT_REDIS.is_set():
159 # try to get the info from Redis
160 result = await self.redis.get(
161 self.get_redis_info_key(fixed_author_name)
162 )
163 if result and (len(info := result.split("|", maxsplit=1)) > 1):
164 remaining_ttl = await self.redis.ttl(
165 self.get_redis_info_key(fixed_author_name)
166 )
167 creation_date = datetime.now(tz=timezone.utc) - timedelta(
168 seconds=AUTHOR_INFO_NEW_TTL - remaining_ttl
169 )
170 if len(info) == 1:
171 author.info = (info[0], None, creation_date)
172 else:
173 author.info = (info[0], info[1], creation_date)
174 else:
175 author.info = await search_wikipedia(fixed_author_name)
176 if author.info is None or author.info[1] is None:
177 # nothing found
178 LOGGER.info("No information found about %s", repr(author))
179 elif EVENT_REDIS.is_set():
180 await self.redis.setex(
181 self.get_redis_info_key(fixed_author_name),
182 AUTHOR_INFO_NEW_TTL,
183 # value to save (the author info)
184 # type is ignored, because author.info[1] is not None
185 "|".join(author.info[0:2]), # type: ignore[arg-type]
186 )
188 wqs = get_wrong_quotes(
189 lambda wq: wq.author_id == author_id,
190 sort=True,
191 )
193 await self.render(
194 "pages/quotes/author_info.html",
195 author=author,
196 wrong_quotes=wqs,
197 title="Autor-Informationen",
198 short_title="Autor-Info",
199 type="Autor",
200 id=author_id,
201 text=str(author),
202 description=f"Falsch zugeordnete Zitate mit „{author}“ als Autor.",
203 create_kwargs={"author": author_id},
204 )
206 def get_redis_info_key(self, author_name: str) -> str:
207 """Get the key to save the author info with Redis."""
208 return f"{self.redis_prefix}:quote-author-info:{author_name}"