Coverage for an_website / utils / search.py: 66.667%

60 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-03-22 18:49 +0000

1# This program is free software: you can redistribute it and/or modify 

2# it under the terms of the GNU Affero General Public License as 

3# published by the Free Software Foundation, either version 3 of the 

4# License, or (at your option) any later version. 

5# 

6# This program is distributed in the hope that it will be useful, 

7# but WITHOUT ANY WARRANTY; without even the implied warranty of 

8# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

9# GNU Affero General Public License for more details. 

10# 

11# You should have received a copy of the GNU Affero General Public License 

12# along with this program. If not, see <https://www.gnu.org/licenses/>. 

13 

14"""Module used for easy and simple searching in O(n) complexity.""" 

15 

16 

17import dataclasses 

18from collections.abc import Callable, Iterable, Iterator, Sequence 

19from typing import Generic, NoReturn, TypeVar 

20 

21import regex as re 

22from typed_stream import Stream 

23 

24T = TypeVar("T") 

25U = TypeVar("U") 

26V = TypeVar("V") 

27 

28 

29class Query: 

30 """Class representing a query.""" 

31 

32 _data: tuple[str, Sequence[str], int] 

33 __slots__ = ("_data",) 

34 

35 def __bool__(self) -> bool: 

36 """Return False if this query matches everything.""" 

37 return bool(self.words) 

38 

39 def __hash__(self) -> int: 

40 """Hash this.""" 

41 return hash(self.query) 

42 

43 def __init__(self, query: str) -> None: 

44 """Initialize this.""" 

45 if hasattr(self, "_data"): 

46 raise ValueError("Already initialized.") 

47 query = query.lower() 

48 # pylint: disable=bad-builtin 

49 words = tuple(filter(None, re.split(r"\W+", query))) 

50 words_len = sum(len(word) for word in words) 

51 object.__setattr__(self, "_data", (query, words, words_len)) 

52 

53 def __reduce__(self) -> tuple[type[Query], tuple[str]]: 

54 """Reduce this object.""" 

55 return Query, (self.query,) 

56 

57 def __repr__(self) -> str: 

58 """Return a string representation of self.""" 

59 return f"Query({self.query!r})" 

60 

61 def __setattr__(self, key: object, value: object) -> NoReturn: 

62 """Raise an AttributeError.""" 

63 raise AttributeError("Cannot modify Query.") 

64 

65 @property 

66 def query(self) -> str: 

67 """The original query.""" 

68 return self._data[0] 

69 

70 def score(self, field_values: tuple[str, ...]) -> float: 

71 """Field values needs to be a tuple of lower cased strings.""" 

72 if any(self.query in val for val in field_values): 

73 return 1.0 

74 return sum( 

75 ( 

76 sum(word in value for value in field_values) 

77 * (len(word) / self.words_len) 

78 ) 

79 for word in self.words 

80 ) / len(field_values) 

81 

82 @property 

83 def words(self) -> Sequence[str]: 

84 """The words in the query.""" 

85 return self._data[1] 

86 

87 @property 

88 def words_len(self) -> int: 

89 """Return sum(len(word) for word in self.words).""" 

90 return self._data[2] 

91 

92 

93@dataclasses.dataclass(frozen=True, slots=True, order=True) 

94class ScoredValue(Generic[T]): 

95 """Value with score.""" 

96 

97 score: float 

98 value: T 

99 

100 

101class DataProvider(Generic[T, U]): 

102 """Provide Data.""" 

103 

104 __slots__ = ("_data", "_key", "_convert") 

105 

106 _data: Iterable[T] | Callable[[], Iterable[T]] 

107 _key: Callable[[T], str | tuple[str, ...]] 

108 _convert: Callable[[T], U] 

109 

110 def __init__( 

111 self, 

112 data: Iterable[T] | Callable[[], Iterable[T]], 

113 key: Callable[[T], str | tuple[str, ...]], 

114 convert: Callable[[T], U], 

115 ) -> None: 

116 """Initialize this.""" 

117 self._data = data 

118 self._key = key 

119 self._convert = convert 

120 

121 def _value_to_fields(self, value: T) -> tuple[str, ...]: 

122 """Convert a value to a tuple of strings.""" 

123 return ( 

124 (cpv.lower(),) 

125 if isinstance(cpv := self._key(value), str) 

126 else tuple(map(str.lower, cpv)) # pylint: disable=bad-builtin 

127 ) 

128 

129 @property 

130 def data(self) -> Iterable[T]: 

131 """Return the data.""" 

132 return self._data if isinstance(self._data, Iterable) else self._data() 

133 

134 def search( 

135 self, query: Query, excl_min_score: float = 0.0 

136 ) -> Iterator[ScoredValue[U]]: 

137 """Search this.""" 

138 for value in self.data: 

139 score = query.score(self._value_to_fields(value)) 

140 if score > excl_min_score: 

141 yield ScoredValue(score, self._convert(value)) 

142 

143 

144def search( 

145 query: Query, 

146 *providers: DataProvider[object, T], 

147 excl_min_score: float = 0.0, 

148) -> list[ScoredValue[T]]: 

149 """Search through data.""" 

150 return sorted( 

151 Stream(providers).flat_map(lambda x: x.search(query, excl_min_score)), 

152 key=lambda sv: sv.score, 

153 )