Coverage for an_website/utils/search.py: 70.149%
67 statements
« prev ^ index » next coverage.py v7.6.4, created at 2024-11-16 19:56 +0000
« prev ^ index » next coverage.py v7.6.4, created at 2024-11-16 19:56 +0000
1# This program is free software: you can redistribute it and/or modify
2# it under the terms of the GNU Affero General Public License as
3# published by the Free Software Foundation, either version 3 of the
4# License, or (at your option) any later version.
5#
6# This program is distributed in the hope that it will be useful,
7# but WITHOUT ANY WARRANTY; without even the implied warranty of
8# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9# GNU Affero General Public License for more details.
10#
11# You should have received a copy of the GNU Affero General Public License
12# along with this program. If not, see <https://www.gnu.org/licenses/>.
14"""Module used for easy and simple searching in O(n) complexity."""
16from __future__ import annotations
18import dataclasses
19from collections.abc import Callable, Iterable, Iterator, Sequence
20from typing import Generic, NoReturn, TypeVar
22import regex as re
23from typed_stream import Stream
25T = TypeVar("T")
26U = TypeVar("U")
27V = TypeVar("V")
30class Query:
31 """Class representing a query."""
33 _data: tuple[str, Sequence[str], int]
34 __slots__ = ("_data",)
36 def __bool__(self) -> bool:
37 """Return False if this query matches everything."""
38 return bool(self.words)
40 def __hash__(self) -> int:
41 """Hash this."""
42 return hash(self.query)
44 def __init__(self, query: str) -> None:
45 """Initialize this."""
46 if hasattr(self, "_data"):
47 raise ValueError("Already initialized.")
48 query = query.lower()
49 # pylint: disable=bad-builtin
50 words = tuple(filter(None, re.split(r"\W+", query)))
51 words_len = sum(len(word) for word in words)
52 object.__setattr__(self, "_data", (query, words, words_len))
54 def __reduce__(self) -> tuple[type[Query], tuple[str]]:
55 """Reduce this object."""
56 return Query, (self.query,)
58 def __repr__(self) -> str:
59 """Return a string representation of self."""
60 return f"Query({self.query!r})"
62 def __setattr__(self, key: object, value: object) -> NoReturn:
63 """Raise an AttributeError."""
64 raise AttributeError("Cannot modify Query.")
66 @property
67 def query(self) -> str:
68 """The original query."""
69 return self._data[0]
71 def score(self, field_values: tuple[str, ...]) -> float:
72 """Field values needs to be a tuple of lower cased strings."""
73 if any(self.query in val for val in field_values):
74 return 1.0
75 return sum(
76 (
77 sum(word in value for value in field_values)
78 * (len(word) / self.words_len)
79 )
80 for word in self.words
81 ) / len(field_values)
83 @property
84 def words(self) -> Sequence[str]:
85 """The words in the query."""
86 return self._data[1]
88 @property
89 def words_len(self) -> int:
90 """Return sum(len(word) for word in self.words)."""
91 return self._data[2]
94@dataclasses.dataclass(frozen=True, slots=True, order=True)
95class ScoredValue(Generic[T]):
96 """Value with score."""
98 score: float
99 value: T
102class DataProvider(Generic[T, U]):
103 """Provide Data."""
105 __slots__ = ("_data", "_key", "_convert")
107 _data: Iterable[T] | Callable[[], Iterable[T]]
108 _key: Callable[[T], str | tuple[str, ...]]
109 _convert: Callable[[T], U]
111 def __init__(
112 self,
113 data: Iterable[T] | Callable[[], Iterable[T]],
114 key: Callable[[T], str | tuple[str, ...]],
115 convert: Callable[[T], U],
116 ) -> None:
117 """Initialize this."""
118 self._data = data
119 self._key = key
120 self._convert = convert
122 def _value_to_fields(self, value: T) -> tuple[str, ...]:
123 """Convert a value to a tuple of strings."""
124 return (
125 (cpv.lower(),)
126 if isinstance(cpv := self._key(value), str)
127 else tuple(map(str.lower, cpv)) # pylint: disable=bad-builtin
128 )
130 @property
131 def data(self) -> Iterable[T]:
132 """Return the data."""
133 return self._data if isinstance(self._data, Iterable) else self._data()
135 def search(
136 self, query: Query, excl_min_score: float = 0.0
137 ) -> Iterator[ScoredValue[U]]:
138 """Search this."""
139 for value in self.data:
140 score = query.score(self._value_to_fields(value))
141 if score > excl_min_score:
142 yield ScoredValue(score, self._convert(value))
145def search(
146 query: Query,
147 *providers: DataProvider[object, T],
148 excl_min_score: float = 0.0,
149) -> list[ScoredValue[T]]:
150 """Search through data."""
151 return sorted(
152 Stream(providers).flat_map(lambda x: x.search(query, excl_min_score)),
153 key=lambda sv: sv.score,
154 )