Coverage for an_website / quotes / info.py: 82.353%
85 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-31 10:27 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-31 10:27 +0000
1# This program is free software: you can redistribute it and/or modify
2# it under the terms of the GNU Affero General Public License as
3# published by the Free Software Foundation, either version 3 of the
4# License, or (at your option) any later version.
5#
6# This program is distributed in the hope that it will be useful,
7# but WITHOUT ANY WARRANTY; without even the implied warranty of
8# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9# GNU Affero General Public License for more details.
10#
11# You should have received a copy of the GNU Affero General Public License
12# along with this program. If not, see <https://www.gnu.org/licenses/>.
14"""Info page to show information about authors and quotes."""
17import logging
18from datetime import datetime, timedelta, timezone
19from typing import Final, cast
20from urllib.parse import quote as quote_url
22import orjson as json
23import regex
24from tornado.httpclient import AsyncHTTPClient
25from tornado.web import HTTPError
27from .. import CA_BUNDLE_PATH, EVENT_REDIS
28from ..utils.request_handler import HTMLRequestHandler
29from .utils import get_author_by_id, get_quote_by_id, get_wrong_quotes
31LOGGER: Final = logging.getLogger(__name__)
34class QuotesInfoPage(HTMLRequestHandler):
35 """The request handler used for the info page."""
37 RATELIMIT_GET_LIMIT = 30
39 async def get(self, id_str: str, *, head: bool = False) -> None:
40 """Handle GET requests to the quote info page."""
41 quote_id: int = int(id_str)
42 quote = await get_quote_by_id(quote_id)
43 if quote is None:
44 raise HTTPError(404)
45 if head:
46 return
47 wqs = get_wrong_quotes(lambda wq: wq.quote_id == quote_id, sort=True)
48 await self.render(
49 "pages/quotes/quote_info.html",
50 quote=quote,
51 wrong_quotes=wqs,
52 title="Zitat-Informationen",
53 short_title="Zitat-Info",
54 type="Zitat",
55 id=quote.id,
56 text=str(quote),
57 description=f"Falsch zugeordnete Zitate mit „{quote}“ als Zitat.",
58 create_kwargs={"quote": quote.id},
59 )
62WIKI_API_DE: Final[str] = "https://de.wikipedia.org/w/api.php"
63WIKI_API_EN: Final[str] = "https://en.wikipedia.org/w/api.php"
66async def search_wikipedia(
67 query: str, api: str = WIKI_API_DE
68) -> None | tuple[str, None | str, datetime]:
69 """
70 Search Wikipedia to get information about the query.
72 Return a tuple with the URL and the content.
73 """
74 if not query:
75 return None
76 # try to get the info from Wikipedia
77 response = await AsyncHTTPClient().fetch(
78 (
79 f"{api}?action=opensearch&namespace=0&profile=normal&"
80 f"search={quote_url(query)}&limit=1&redirects=resolve&format=json"
81 ),
82 ca_certs=CA_BUNDLE_PATH,
83 )
84 response_json = json.loads(response.body)
85 if not response_json[1]:
86 if api == WIKI_API_DE:
87 return await search_wikipedia(query, WIKI_API_EN)
88 return None # nothing found
89 page_name = response_json[1][0]
90 # get the URL of the content & replace "," with "%2C"
91 url = str(response_json[3][0]).replace(",", "%2C")
93 return (
94 url,
95 await get_wikipedia_page_content(page_name, api),
96 datetime.now(timezone.utc),
97 )
100async def get_wikipedia_page_content(
101 page_name: str, api: str = WIKI_API_DE
102) -> None | str:
103 """Get content from a Wikipedia page and return it."""
104 response = await AsyncHTTPClient().fetch(
105 (
106 f"{api}?action=query&prop=extracts&exsectionformat=plain&exintro&"
107 f"titles={quote_url(page_name)}&explaintext&format=json&exsentences=5"
108 ),
109 ca_certs=CA_BUNDLE_PATH,
110 )
111 response_json = json.loads(response.body)
112 if "query" not in response_json or "pages" not in response_json["query"]:
113 return None
114 pages: dict[str, str] = response_json["query"]["pages"]
115 page = cast(dict[str, str], tuple(pages.values())[0])
116 if "extract" not in page:
117 return None
118 return page["extract"]
121def fix_author_for_wikipedia_search(author: str) -> str:
122 """
123 Fix author for Wikipedia search.
125 This tries to reduce common problems with authors.
126 So that we can show more information.
127 """
128 author = regex.sub(r"\s+", " ", author)
129 author = regex.sub(r"\s*\(.*\)", "", author)
130 author = regex.sub(r"\s*Werbespruch$", "", author, regex.IGNORECASE)
131 author = regex.sub(r"\s*Werbung$", "", author, regex.IGNORECASE)
132 author = regex.sub(r"^nach\s*", "", author, regex.IGNORECASE)
133 author = regex.sub(r"^Ein\s+", "", author, regex.IGNORECASE)
134 return author
137# time to live in seconds (1 month)
138AUTHOR_INFO_NEW_TTL: Final[int] = 60 * 60 * 24 * 30
141class AuthorsInfoPage(HTMLRequestHandler):
142 """The request handler used for the info page."""
144 RATELIMIT_GET_LIMIT = 5
146 async def get(self, id_str: str, *, head: bool = False) -> None:
147 """Handle GET requests to the author info page."""
148 author_id: int = int(id_str)
149 author = await get_author_by_id(author_id)
150 if author is None:
151 raise HTTPError(404)
152 if head:
153 return
154 if author.info is None:
155 result = None
156 fixed_author_name = fix_author_for_wikipedia_search(author.name)
157 if EVENT_REDIS.is_set():
158 # try to get the info from Redis
159 result = await self.redis.get(
160 self.get_redis_info_key(fixed_author_name)
161 )
162 if result and (len(info := result.split("|", maxsplit=1)) > 1):
163 remaining_ttl = await self.redis.ttl(
164 self.get_redis_info_key(fixed_author_name)
165 )
166 creation_date = datetime.now(tz=timezone.utc) - timedelta(
167 seconds=AUTHOR_INFO_NEW_TTL - remaining_ttl
168 )
169 if len(info) == 1:
170 author.info = (info[0], None, creation_date)
171 else:
172 author.info = (info[0], info[1], creation_date)
173 else:
174 author.info = await search_wikipedia(fixed_author_name)
175 if author.info is None or author.info[1] is None:
176 # nothing found
177 LOGGER.info("No information found about %s", repr(author))
178 elif EVENT_REDIS.is_set():
179 await self.redis.setex(
180 self.get_redis_info_key(fixed_author_name),
181 AUTHOR_INFO_NEW_TTL,
182 # value to save (the author info)
183 # type is ignored, because author.info[1] is not None
184 "|".join(author.info[0:2]), # type: ignore[arg-type]
185 )
187 wqs = get_wrong_quotes(
188 lambda wq: wq.author_id == author_id,
189 sort=True,
190 )
192 await self.render(
193 "pages/quotes/author_info.html",
194 author=author,
195 wrong_quotes=wqs,
196 title="Autor-Informationen",
197 short_title="Autor-Info",
198 type="Autor",
199 id=author_id,
200 text=str(author),
201 description=f"Falsch zugeordnete Zitate mit „{author}“ als Autor.",
202 create_kwargs={"author": author_id},
203 )
205 def get_redis_info_key(self, author_name: str) -> str:
206 """Get the key to save the author info with Redis."""
207 return f"{self.redis_prefix}:quote-author-info:{author_name}"