1from __future__ import annotations
  2
  3import re
  4import string
  5from typing import Any
  6
  7from plain.exceptions import ImproperlyConfigured
  8from plain.internal import internalcode
  9from plain.preflight import PreflightResult
 10from plain.runtime import settings
 11from plain.utils.regex_helper import _lazy_re_compile
 12
 13from .converters import _get_converter
 14
 15
 16@internalcode
 17class CheckURLMixin:
 18    # Expected to be set by subclasses
 19    regex: re.Pattern[str]
 20    name: str | None
 21
 22    def describe(self) -> str:
 23        """
 24        Format the URL pattern for display in warning messages.
 25        """
 26        description = f"'{self}'"
 27        if self.name:
 28            description += f" [name='{self.name}']"
 29        return description
 30
 31    def _check_pattern_startswith_slash(self) -> list[PreflightResult]:
 32        """
 33        Check that the pattern does not begin with a forward slash.
 34        """
 35        regex_pattern = self.regex.pattern
 36        if not settings.APPEND_SLASH:
 37            # Skip check as it can be useful to start a URL pattern with a slash
 38            # when APPEND_SLASH=False.
 39            return []
 40        if regex_pattern.startswith(("/", "^/", "^\\/")) and not regex_pattern.endswith(
 41            "/"
 42        ):
 43            warning = PreflightResult(
 44                fix=f"URL pattern {self.describe()} starts with unnecessary '/'. Remove the leading slash.",
 45                warning=True,
 46                id="urls.pattern_starts_with_slash",
 47            )
 48            return [warning]
 49        else:
 50            return []
 51
 52
 53class RegexPattern(CheckURLMixin):
 54    def __init__(self, regex: str, name: str | None = None, is_endpoint: bool = False):
 55        self._regex = regex
 56        self._is_endpoint = is_endpoint
 57        self.name = name
 58        self.converters: dict[str, Any] = {}
 59        self.regex = self._compile(str(regex))
 60
 61    def match(self, path: str) -> tuple[str, tuple[Any, ...], dict[str, Any]] | None:
 62        match = (
 63            self.regex.fullmatch(path)
 64            if self._is_endpoint and self.regex.pattern.endswith("$")
 65            else self.regex.search(path)
 66        )
 67        if match:
 68            # If there are any named groups, use those as kwargs, ignoring
 69            # non-named groups. Otherwise, pass all non-named arguments as
 70            # positional arguments.
 71            kwargs = match.groupdict()
 72            args = () if kwargs else match.groups()
 73            kwargs = {k: v for k, v in kwargs.items() if v is not None}
 74            return path[match.end() :], args, kwargs
 75        return None
 76
 77    def preflight(self) -> list[PreflightResult]:
 78        warnings = []
 79        warnings.extend(self._check_pattern_startswith_slash())
 80        if not self._is_endpoint:
 81            warnings.extend(self._check_include_trailing_dollar())
 82        return warnings
 83
 84    def _check_include_trailing_dollar(self) -> list[PreflightResult]:
 85        regex_pattern = self.regex.pattern
 86        if regex_pattern.endswith("$") and not regex_pattern.endswith(r"\$"):
 87            return [
 88                PreflightResult(
 89                    fix=f"Include pattern {self.describe()} ends with '$' which prevents URL inclusion. Remove the dollar sign.",
 90                    warning=True,
 91                    id="urls.include_pattern_ends_with_dollar",
 92                )
 93            ]
 94        else:
 95            return []
 96
 97    def _compile(self, regex: str) -> re.Pattern[str]:
 98        """Compile and return the given regular expression."""
 99        try:
100            return re.compile(regex)
101        except re.error as e:
102            raise ImproperlyConfigured(
103                f'"{regex}" is not a valid regular expression: {e}'
104            ) from e
105
106    def __str__(self) -> str:
107        return str(self._regex)
108
109
110_PATH_PARAMETER_COMPONENT_RE = _lazy_re_compile(
111    r"<(?:(?P<converter>[^>:]+):)?(?P<parameter>[^>]+)>"
112)
113
114
115def _route_to_regex(
116    route: str, is_endpoint: bool = False
117) -> tuple[str, dict[str, Any]]:
118    """
119    Convert a path pattern into a regular expression. Return the regular
120    expression and a dictionary mapping the capture names to the converters.
121    For example, 'foo/<int:id>' returns '^foo\\/(?P<id>[0-9]+)'
122    and {'id': <plain.urls.converters.IntConverter>}.
123    """
124    original_route = route
125    parts = ["^"]
126    converters = {}
127    while True:
128        match = _PATH_PARAMETER_COMPONENT_RE.search(route)
129        if not match:
130            parts.append(re.escape(route))
131            break
132        elif not set(match.group()).isdisjoint(string.whitespace):
133            raise ImproperlyConfigured(
134                f"URL route '{original_route}' cannot contain whitespace in angle brackets "
135                "<…>."
136            )
137        parts.append(re.escape(route[: match.start()]))
138        route = route[match.end() :]
139        parameter = match["parameter"]
140        if not parameter.isidentifier():
141            raise ImproperlyConfigured(
142                f"URL route '{original_route}' uses parameter name {parameter!r} which isn't a valid "
143                "Python identifier."
144            )
145        raw_converter = match["converter"]
146        if raw_converter is None:
147            # If a converter isn't specified, the default is `str`.
148            raw_converter = "str"
149        try:
150            converter = _get_converter(raw_converter)
151        except KeyError as e:
152            raise ImproperlyConfigured(
153                f"URL route {original_route!r} uses invalid converter {raw_converter!r}."
154            ) from e
155        converters[parameter] = converter
156        parts.append("(?P<" + parameter + ">" + converter.regex + ")")
157    if is_endpoint:
158        parts.append(r"\Z")
159    return "".join(parts), converters
160
161
162class RoutePattern(CheckURLMixin):
163    def __init__(self, route: str, name: str | None = None, is_endpoint: bool = False):
164        self._route = route
165        self._is_endpoint = is_endpoint
166        self.name = name
167        self.converters = _route_to_regex(str(route), is_endpoint)[1]
168        self.regex = self._compile(str(route))
169
170    def match(self, path: str) -> tuple[str, tuple[()], dict[str, Any]] | None:
171        match = self.regex.search(path)
172        if match:
173            # RoutePattern doesn't allow non-named groups so args are ignored.
174            kwargs = match.groupdict()
175            for key, value in kwargs.items():
176                converter = self.converters[key]
177                try:
178                    kwargs[key] = converter.to_python(value)
179                except ValueError:
180                    return None
181            return path[match.end() :], (), kwargs
182        return None
183
184    def preflight(self) -> list[PreflightResult]:
185        warnings = self._check_pattern_startswith_slash()
186        route = self._route
187        if "(?P<" in route or route.startswith("^") or route.endswith("$"):
188            warnings.append(
189                PreflightResult(
190                    fix=f"Your URL pattern {self.describe()} has a route that contains '(?P<', begins "
191                    "with a '^', or ends with a '$'. This was likely an oversight "
192                    "when migrating to plain.urls.path().",
193                    warning=True,
194                    id="urls.path_migration_warning",
195                )
196            )
197        return warnings
198
199    def _compile(self, route: str) -> re.Pattern[str]:
200        return re.compile(_route_to_regex(route, self._is_endpoint)[0])
201
202    def __str__(self) -> str:
203        return str(self._route)
204
205
206class URLPattern:
207    def __init__(
208        self,
209        *,
210        pattern: RegexPattern | RoutePattern,
211        view: Any,
212        name: str | None = None,
213    ):
214        self.pattern = pattern
215        self.view = view
216        self.name = name
217
218    def __repr__(self) -> str:
219        return f"<{self.__class__.__name__} {self.pattern.describe()}>"
220
221    def preflight(self) -> list[PreflightResult]:
222        warnings = self._check_pattern_name()
223        warnings.extend(self.pattern.preflight())
224        return warnings
225
226    def _check_pattern_name(self) -> list[PreflightResult]:
227        """
228        Check that the pattern name does not contain a colon.
229        """
230        if self.pattern.name is not None and ":" in self.pattern.name:
231            warning = PreflightResult(
232                fix=f"Your URL pattern {self.pattern.describe()} has a name including a ':'. Remove the colon, to "
233                "avoid ambiguous namespace references.",
234                warning=True,
235                id="urls.pattern_name_contains_colon",
236            )
237            return [warning]
238        else:
239            return []
240
241    def resolve(self, path: str) -> Any:
242        match = self.pattern.match(path)
243        if match:
244            new_path, args, captured_kwargs = match
245            from .resolvers import ResolverMatch
246
247            return ResolverMatch(
248                view=self.view,
249                args=args,
250                kwargs=captured_kwargs,
251                url_name=self.pattern.name,
252                route=str(self.pattern),
253            )
254        return None