Coverage for an_website/utils/search.py: 70.149%

67 statements  

« prev     ^ index     » next       coverage.py v7.6.4, created at 2024-11-16 19:56 +0000

1# This program is free software: you can redistribute it and/or modify 

2# it under the terms of the GNU Affero General Public License as 

3# published by the Free Software Foundation, either version 3 of the 

4# License, or (at your option) any later version. 

5# 

6# This program is distributed in the hope that it will be useful, 

7# but WITHOUT ANY WARRANTY; without even the implied warranty of 

8# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

9# GNU Affero General Public License for more details. 

10# 

11# You should have received a copy of the GNU Affero General Public License 

12# along with this program. If not, see <https://www.gnu.org/licenses/>. 

13 

14"""Module used for easy and simple searching in O(n) complexity.""" 

15 

16from __future__ import annotations 

17 

18import dataclasses 

19from collections.abc import Callable, Iterable, Iterator, Sequence 

20from typing import Generic, NoReturn, TypeVar 

21 

22import regex as re 

23from typed_stream import Stream 

24 

25T = TypeVar("T") 

26U = TypeVar("U") 

27V = TypeVar("V") 

28 

29 

30class Query: 

31 """Class representing a query.""" 

32 

33 _data: tuple[str, Sequence[str], int] 

34 __slots__ = ("_data",) 

35 

36 def __bool__(self) -> bool: 

37 """Return False if this query matches everything.""" 

38 return bool(self.words) 

39 

40 def __hash__(self) -> int: 

41 """Hash this.""" 

42 return hash(self.query) 

43 

44 def __init__(self, query: str) -> None: 

45 """Initialize this.""" 

46 if hasattr(self, "_data"): 

47 raise ValueError("Already initialized.") 

48 query = query.lower() 

49 # pylint: disable=bad-builtin 

50 words = tuple(filter(None, re.split(r"\W+", query))) 

51 words_len = sum(len(word) for word in words) 

52 object.__setattr__(self, "_data", (query, words, words_len)) 

53 

54 def __reduce__(self) -> tuple[type[Query], tuple[str]]: 

55 """Reduce this object.""" 

56 return Query, (self.query,) 

57 

58 def __repr__(self) -> str: 

59 """Return a string representation of self.""" 

60 return f"Query({self.query!r})" 

61 

62 def __setattr__(self, key: object, value: object) -> NoReturn: 

63 """Raise an AttributeError.""" 

64 raise AttributeError("Cannot modify Query.") 

65 

66 @property 

67 def query(self) -> str: 

68 """The original query.""" 

69 return self._data[0] 

70 

71 def score(self, field_values: tuple[str, ...]) -> float: 

72 """Field values needs to be a tuple of lower cased strings.""" 

73 if any(self.query in val for val in field_values): 

74 return 1.0 

75 return sum( 

76 ( 

77 sum(word in value for value in field_values) 

78 * (len(word) / self.words_len) 

79 ) 

80 for word in self.words 

81 ) / len(field_values) 

82 

83 @property 

84 def words(self) -> Sequence[str]: 

85 """The words in the query.""" 

86 return self._data[1] 

87 

88 @property 

89 def words_len(self) -> int: 

90 """Return sum(len(word) for word in self.words).""" 

91 return self._data[2] 

92 

93 

94@dataclasses.dataclass(frozen=True, slots=True, order=True) 

95class ScoredValue(Generic[T]): 

96 """Value with score.""" 

97 

98 score: float 

99 value: T 

100 

101 

102class DataProvider(Generic[T, U]): 

103 """Provide Data.""" 

104 

105 __slots__ = ("_data", "_key", "_convert") 

106 

107 _data: Iterable[T] | Callable[[], Iterable[T]] 

108 _key: Callable[[T], str | tuple[str, ...]] 

109 _convert: Callable[[T], U] 

110 

111 def __init__( 

112 self, 

113 data: Iterable[T] | Callable[[], Iterable[T]], 

114 key: Callable[[T], str | tuple[str, ...]], 

115 convert: Callable[[T], U], 

116 ) -> None: 

117 """Initialize this.""" 

118 self._data = data 

119 self._key = key 

120 self._convert = convert 

121 

122 def _value_to_fields(self, value: T) -> tuple[str, ...]: 

123 """Convert a value to a tuple of strings.""" 

124 return ( 

125 (cpv.lower(),) 

126 if isinstance(cpv := self._key(value), str) 

127 else tuple(map(str.lower, cpv)) # pylint: disable=bad-builtin 

128 ) 

129 

130 @property 

131 def data(self) -> Iterable[T]: 

132 """Return the data.""" 

133 return self._data if isinstance(self._data, Iterable) else self._data() 

134 

135 def search( 

136 self, query: Query, excl_min_score: float = 0.0 

137 ) -> Iterator[ScoredValue[U]]: 

138 """Search this.""" 

139 for value in self.data: 

140 score = query.score(self._value_to_fields(value)) 

141 if score > excl_min_score: 

142 yield ScoredValue(score, self._convert(value)) 

143 

144 

145def search( 

146 query: Query, 

147 *providers: DataProvider[object, T], 

148 excl_min_score: float = 0.0, 

149) -> list[ScoredValue[T]]: 

150 """Search through data.""" 

151 return sorted( 

152 Stream(providers).flat_map(lambda x: x.search(query, excl_min_score)), 

153 key=lambda sv: sv.score, 

154 )