1from __future__ import annotations
2
3import re
4import unicodedata
5from typing import Any
6
7from plain.utils.functional import SimpleLazyObject, keep_lazy_text, lazy
8from plain.utils.regex_helper import _lazy_re_compile
9
10# Set up regular expressions
11_re_words = _lazy_re_compile(r"<[^>]+?>|([^<>\s]+)", re.S)
12_re_chars = _lazy_re_compile(r"<[^>]+?>|(.)", re.S)
13_re_tag = _lazy_re_compile(r"<(/)?(\S+?)(?:(\s*/)|\s.*?)?>", re.S)
14
15
16class Truncator(SimpleLazyObject):
17 """
18 An object used to truncate text, either by characters or words.
19 """
20
21 _wrapped: str # Override parent type since we always store str
22
23 def __init__(self, text: Any):
24 super().__init__(lambda: str(text))
25
26 def add_truncation_text(self, text: str, truncate: str | None = None) -> str:
27 if truncate is None:
28 truncate = "%(truncated_text)s…"
29 if "%(truncated_text)s" in truncate:
30 return truncate % {"truncated_text": text}
31 # The truncation text didn't contain the %(truncated_text)s string
32 # replacement argument so just append it to the text.
33 if text.endswith(truncate):
34 # But don't append the truncation text if the current text already
35 # ends in this.
36 return text
37 return f"{text}{truncate}"
38
39 def chars(self, num: int, truncate: str | None = None, html: bool = False) -> str:
40 """
41 Return the text truncated to be no longer than the specified number
42 of characters.
43
44 `truncate` specifies what should be used to notify that the string has
45 been truncated, defaulting to a translatable string of an ellipsis.
46 """
47 self._setup()
48 length = int(num)
49 text = unicodedata.normalize("NFC", self._wrapped)
50
51 # Calculate the length to truncate to (max length - end_text length)
52 truncate_len = length
53 for char in self.add_truncation_text("", truncate):
54 if not unicodedata.combining(char):
55 truncate_len -= 1
56 if truncate_len == 0:
57 break
58 if html:
59 return self._truncate_html(length, truncate, text, truncate_len, False)
60 return self._text_chars(length, truncate, text, truncate_len)
61
62 def _text_chars(
63 self, length: int, truncate: str | None, text: str, truncate_len: int
64 ) -> str:
65 """Truncate a string after a certain number of chars."""
66 s_len = 0
67 end_index = None
68 for i, char in enumerate(text):
69 if unicodedata.combining(char):
70 # Don't consider combining characters
71 # as adding to the string length
72 continue
73 s_len += 1
74 if end_index is None and s_len > truncate_len:
75 end_index = i
76 if s_len > length:
77 # Return the truncated string
78 return self.add_truncation_text(text[: end_index or 0], truncate)
79
80 # Return the original string since no truncation was necessary
81 return text
82
83 def words(self, num: int, truncate: str | None = None, html: bool = False) -> str:
84 """
85 Truncate a string after a certain number of words. `truncate` specifies
86 what should be used to notify that the string has been truncated,
87 defaulting to ellipsis.
88 """
89 self._setup()
90 length = int(num)
91 if html:
92 return self._truncate_html(length, truncate, self._wrapped, length, True)
93 return self._text_words(length, truncate)
94
95 def _text_words(self, length: int, truncate: str | None) -> str:
96 """
97 Truncate a string after a certain number of words.
98
99 Strip newlines in the string.
100 """
101 words = self._wrapped.split()
102 if len(words) > length:
103 words = words[:length]
104 return self.add_truncation_text(" ".join(words), truncate)
105 return " ".join(words)
106
107 def _truncate_html(
108 self,
109 length: int,
110 truncate: str | None,
111 text: str,
112 truncate_len: int,
113 words: bool,
114 ) -> str:
115 """
116 Truncate HTML to a certain number of chars (not counting tags and
117 comments), or, if words is True, then to a certain number of words.
118 Close opened tags if they were correctly closed in the given HTML.
119
120 Preserve newlines in the HTML.
121 """
122 if words and length <= 0:
123 return ""
124
125 html4_singlets = (
126 "br",
127 "col",
128 "link",
129 "base",
130 "img",
131 "param",
132 "area",
133 "hr",
134 "input",
135 )
136
137 # Count non-HTML chars/words and keep note of open tags
138 pos = 0
139 end_text_pos = 0
140 current_len = 0
141 open_tags = []
142
143 regex = _re_words if words else _re_chars
144
145 while current_len <= length:
146 m = regex.search(text, pos)
147 if not m:
148 # Checked through whole string
149 break
150 pos = m.end(0)
151 if m[1]:
152 # It's an actual non-HTML word or char
153 current_len += 1
154 if current_len == truncate_len:
155 end_text_pos = pos
156 continue
157 # Check for tag
158 tag = _re_tag.match(m[0])
159 if not tag or current_len >= truncate_len:
160 # Don't worry about non tags or tags after our truncate point
161 continue
162 closing_tag, tagname, self_closing = tag.groups()
163 # Element names are always case-insensitive
164 tagname = tagname.lower()
165 if self_closing or tagname in html4_singlets:
166 pass
167 elif closing_tag:
168 # Check for match in open tags list
169 try:
170 i = open_tags.index(tagname)
171 except ValueError:
172 pass
173 else:
174 # SGML: An end tag closes, back to the matching start tag,
175 # all unclosed intervening start tags with omitted end tags
176 open_tags = open_tags[i + 1 :]
177 else:
178 # Add it to the start of the open tags list
179 open_tags.insert(0, tagname)
180
181 if current_len <= length:
182 return text
183 out = text[:end_text_pos]
184 truncate_text = self.add_truncation_text("", truncate)
185 if truncate_text:
186 out += truncate_text
187 # Close any tags still open
188 for tag in open_tags:
189 out += f"</{tag}>"
190 # Return string
191 return out
192
193
194@keep_lazy_text
195def slugify(value: Any, allow_unicode: bool = False) -> str:
196 """
197 Convert to ASCII if 'allow_unicode' is False. Convert spaces or repeated
198 dashes to single dashes. Remove characters that aren't alphanumerics,
199 underscores, or hyphens. Convert to lowercase. Also strip leading and
200 trailing whitespace, dashes, and underscores.
201 """
202 value = str(value)
203 if allow_unicode:
204 value = unicodedata.normalize("NFKC", value)
205 else:
206 value = (
207 unicodedata.normalize("NFKD", value)
208 .encode("ascii", "ignore")
209 .decode("ascii")
210 )
211 value = re.sub(r"[^\w\s-]", "", value.lower())
212 return re.sub(r"[-\s]+", "-", value).strip("-_")
213
214
215def pluralize(singular: str, plural: str, number: int) -> str:
216 if number == 1:
217 return singular
218 else:
219 return plural
220
221
222def pluralize_lazy(singular: str, plural: str, number: int | str) -> Any:
223 def _lazy_number_unpickle(
224 func: Any, resultclass: Any, number: Any, kwargs: dict[str, Any]
225 ) -> Any:
226 return lazy_number(func, resultclass, number=number, **kwargs)
227
228 def lazy_number(
229 func: Any, resultclass: Any, number: int | str | None = None, **kwargs: Any
230 ) -> Any:
231 if isinstance(number, int):
232 kwargs["number"] = number
233 proxy = lazy(func, resultclass)(**kwargs)
234 else:
235 original_kwargs = kwargs.copy()
236
237 class NumberAwareString(resultclass):
238 def __bool__(self) -> bool:
239 return bool(kwargs["singular"])
240
241 def _get_number_value(self, values: dict[str, Any]) -> Any:
242 try:
243 return values[number] # ty: ignore[invalid-argument-type]
244 except KeyError:
245 raise KeyError(
246 f"Your dictionary lacks key '{number}'. Please provide "
247 "it, because it is required to determine whether "
248 "string is singular or plural."
249 )
250
251 def _translate(self, number_value: int) -> str:
252 kwargs["number"] = number_value
253 return func(**kwargs)
254
255 def format(self, *args: Any, **kwargs: Any) -> str:
256 number_value = (
257 self._get_number_value(kwargs) if kwargs and number else args[0]
258 )
259 return self._translate(number_value).format(*args, **kwargs)
260
261 def __mod__(self, rhs: Any) -> str:
262 if isinstance(rhs, dict) and number:
263 number_value = self._get_number_value(rhs)
264 else:
265 number_value = rhs
266 translated = self._translate(number_value)
267 try:
268 translated %= rhs
269 except TypeError:
270 # String doesn't contain a placeholder for the number.
271 pass
272 return translated
273
274 proxy = lazy(lambda **kwargs: NumberAwareString(), NumberAwareString)(
275 **kwargs
276 )
277 proxy.__reduce__ = lambda: (
278 _lazy_number_unpickle,
279 (func, resultclass, number, original_kwargs),
280 )
281 return proxy
282
283 return lazy_number(pluralize, str, singular=singular, plural=plural, number=number)