v0.150.0
  1from __future__ import annotations
  2
  3import re
  4import unicodedata
  5from typing import Any
  6
  7from plain.utils.functional import SimpleLazyObject, keep_lazy_text, lazy
  8from plain.utils.regex_helper import _lazy_re_compile
  9
 10# Set up regular expressions
 11_re_words = _lazy_re_compile(r"<[^>]+?>|([^<>\s]+)", re.S)
 12_re_chars = _lazy_re_compile(r"<[^>]+?>|(.)", re.S)
 13_re_tag = _lazy_re_compile(r"<(/)?(\S+?)(?:(\s*/)|\s.*?)?>", re.S)
 14
 15
 16class Truncator(SimpleLazyObject):
 17    """
 18    An object used to truncate text, either by characters or words.
 19    """
 20
 21    _wrapped: str  # Override parent type since we always store str
 22
 23    def __init__(self, text: Any):
 24        super().__init__(lambda: str(text))
 25
 26    def add_truncation_text(self, text: str, truncate: str | None = None) -> str:
 27        if truncate is None:
 28            truncate = "%(truncated_text)s…"
 29        if "%(truncated_text)s" in truncate:
 30            return truncate % {"truncated_text": text}
 31        # The truncation text didn't contain the %(truncated_text)s string
 32        # replacement argument so just append it to the text.
 33        if text.endswith(truncate):
 34            # But don't append the truncation text if the current text already
 35            # ends in this.
 36            return text
 37        return f"{text}{truncate}"
 38
 39    def chars(self, num: int, truncate: str | None = None, html: bool = False) -> str:
 40        """
 41        Return the text truncated to be no longer than the specified number
 42        of characters.
 43
 44        `truncate` specifies what should be used to notify that the string has
 45        been truncated, defaulting to a translatable string of an ellipsis.
 46        """
 47        self._setup()
 48        length = int(num)
 49        text = unicodedata.normalize("NFC", self._wrapped)
 50
 51        # Calculate the length to truncate to (max length - end_text length)
 52        truncate_len = length
 53        for char in self.add_truncation_text("", truncate):
 54            if not unicodedata.combining(char):
 55                truncate_len -= 1
 56                if truncate_len == 0:
 57                    break
 58        if html:
 59            return self._truncate_html(length, truncate, text, truncate_len, False)
 60        return self._text_chars(length, truncate, text, truncate_len)
 61
 62    def _text_chars(
 63        self, length: int, truncate: str | None, text: str, truncate_len: int
 64    ) -> str:
 65        """Truncate a string after a certain number of chars."""
 66        s_len = 0
 67        end_index = None
 68        for i, char in enumerate(text):
 69            if unicodedata.combining(char):
 70                # Don't consider combining characters
 71                # as adding to the string length
 72                continue
 73            s_len += 1
 74            if end_index is None and s_len > truncate_len:
 75                end_index = i
 76            if s_len > length:
 77                # Return the truncated string
 78                return self.add_truncation_text(text[: end_index or 0], truncate)
 79
 80        # Return the original string since no truncation was necessary
 81        return text
 82
 83    def words(self, num: int, truncate: str | None = None, html: bool = False) -> str:
 84        """
 85        Truncate a string after a certain number of words. `truncate` specifies
 86        what should be used to notify that the string has been truncated,
 87        defaulting to ellipsis.
 88        """
 89        self._setup()
 90        length = int(num)
 91        if html:
 92            return self._truncate_html(length, truncate, self._wrapped, length, True)
 93        return self._text_words(length, truncate)
 94
 95    def _text_words(self, length: int, truncate: str | None) -> str:
 96        """
 97        Truncate a string after a certain number of words.
 98
 99        Strip newlines in the string.
100        """
101        words = self._wrapped.split()
102        if len(words) > length:
103            words = words[:length]
104            return self.add_truncation_text(" ".join(words), truncate)
105        return " ".join(words)
106
107    def _truncate_html(
108        self,
109        length: int,
110        truncate: str | None,
111        text: str,
112        truncate_len: int,
113        words: bool,
114    ) -> str:
115        """
116        Truncate HTML to a certain number of chars (not counting tags and
117        comments), or, if words is True, then to a certain number of words.
118        Close opened tags if they were correctly closed in the given HTML.
119
120        Preserve newlines in the HTML.
121        """
122        if words and length <= 0:
123            return ""
124
125        html4_singlets = (
126            "br",
127            "col",
128            "link",
129            "base",
130            "img",
131            "param",
132            "area",
133            "hr",
134            "input",
135        )
136
137        # Count non-HTML chars/words and keep note of open tags
138        pos = 0
139        end_text_pos = 0
140        current_len = 0
141        open_tags = []
142
143        regex = _re_words if words else _re_chars
144
145        while current_len <= length:
146            m = regex.search(text, pos)
147            if not m:
148                # Checked through whole string
149                break
150            pos = m.end(0)
151            if m[1]:
152                # It's an actual non-HTML word or char
153                current_len += 1
154                if current_len == truncate_len:
155                    end_text_pos = pos
156                continue
157            # Check for tag
158            tag = _re_tag.match(m[0])
159            if not tag or current_len >= truncate_len:
160                # Don't worry about non tags or tags after our truncate point
161                continue
162            closing_tag, tagname, self_closing = tag.groups()
163            # Element names are always case-insensitive
164            tagname = tagname.lower()
165            if self_closing or tagname in html4_singlets:
166                pass
167            elif closing_tag:
168                # Check for match in open tags list
169                try:
170                    i = open_tags.index(tagname)
171                except ValueError:
172                    pass
173                else:
174                    # SGML: An end tag closes, back to the matching start tag,
175                    # all unclosed intervening start tags with omitted end tags
176                    open_tags = open_tags[i + 1 :]
177            else:
178                # Add it to the start of the open tags list
179                open_tags.insert(0, tagname)
180
181        if current_len <= length:
182            return text
183        out = text[:end_text_pos]
184        truncate_text = self.add_truncation_text("", truncate)
185        if truncate_text:
186            out += truncate_text
187        # Close any tags still open
188        for tag in open_tags:
189            out += f"</{tag}>"
190        # Return string
191        return out
192
193
194@keep_lazy_text
195def slugify(value: Any, allow_unicode: bool = False) -> str:
196    """
197    Convert to ASCII if 'allow_unicode' is False. Convert spaces or repeated
198    dashes to single dashes. Remove characters that aren't alphanumerics,
199    underscores, or hyphens. Convert to lowercase. Also strip leading and
200    trailing whitespace, dashes, and underscores.
201    """
202    value = str(value)
203    if allow_unicode:
204        value = unicodedata.normalize("NFKC", value)
205    else:
206        value = (
207            unicodedata.normalize("NFKD", value)
208            .encode("ascii", "ignore")
209            .decode("ascii")
210        )
211    value = re.sub(r"[^\w\s-]", "", value.lower())
212    return re.sub(r"[-\s]+", "-", value).strip("-_")
213
214
215def pluralize(singular: str, plural: str, number: int) -> str:
216    if number == 1:
217        return singular
218    else:
219        return plural
220
221
222def pluralize_lazy(singular: str, plural: str, number: int | str) -> Any:
223    def _lazy_number_unpickle(
224        func: Any, resultclass: Any, number: Any, kwargs: dict[str, Any]
225    ) -> Any:
226        return lazy_number(func, resultclass, number=number, **kwargs)
227
228    def lazy_number(
229        func: Any, resultclass: Any, number: int | str | None = None, **kwargs: Any
230    ) -> Any:
231        if isinstance(number, int):
232            kwargs["number"] = number
233            proxy = lazy(func, resultclass)(**kwargs)
234        else:
235            original_kwargs = kwargs.copy()
236
237            class NumberAwareString(resultclass):
238                def __bool__(self) -> bool:
239                    return bool(kwargs["singular"])
240
241                def _get_number_value(self, values: dict[str, Any]) -> Any:
242                    try:
243                        return values[number]  # ty: ignore[invalid-argument-type]
244                    except KeyError:
245                        raise KeyError(
246                            f"Your dictionary lacks key '{number}'. Please provide "
247                            "it, because it is required to determine whether "
248                            "string is singular or plural."
249                        )
250
251                def _translate(self, number_value: int) -> str:
252                    kwargs["number"] = number_value
253                    return func(**kwargs)
254
255                def format(self, *args: Any, **kwargs: Any) -> str:
256                    number_value = (
257                        self._get_number_value(kwargs) if kwargs and number else args[0]
258                    )
259                    return self._translate(number_value).format(*args, **kwargs)
260
261                def __mod__(self, rhs: Any) -> str:
262                    if isinstance(rhs, dict) and number:
263                        number_value = self._get_number_value(rhs)
264                    else:
265                        number_value = rhs
266                    translated = self._translate(number_value)
267                    try:
268                        translated %= rhs
269                    except TypeError:
270                        # String doesn't contain a placeholder for the number.
271                        pass
272                    return translated
273
274            proxy = lazy(lambda **kwargs: NumberAwareString(), NumberAwareString)(
275                **kwargs
276            )
277            proxy.__reduce__ = lambda: (
278                _lazy_number_unpickle,
279                (func, resultclass, number, original_kwargs),
280            )
281        return proxy
282
283    return lazy_number(pluralize, str, singular=singular, plural=plural, number=number)