1import re
2import string
3
4from plain.exceptions import ImproperlyConfigured
5from plain.internal import internalcode
6from plain.preflight import Warning
7from plain.runtime import settings
8from plain.utils.regex_helper import _lazy_re_compile
9
10from .converters import get_converter
11
12
13@internalcode
14class CheckURLMixin:
15 def describe(self):
16 """
17 Format the URL pattern for display in warning messages.
18 """
19 description = f"'{self}'"
20 if self.name:
21 description += f" [name='{self.name}']"
22 return description
23
24 def _check_pattern_startswith_slash(self):
25 """
26 Check that the pattern does not begin with a forward slash.
27 """
28 regex_pattern = self.regex.pattern
29 if not settings.APPEND_SLASH:
30 # Skip check as it can be useful to start a URL pattern with a slash
31 # when APPEND_SLASH=False.
32 return []
33 if regex_pattern.startswith(("/", "^/", "^\\/")) and not regex_pattern.endswith(
34 "/"
35 ):
36 warning = Warning(
37 f"Your URL pattern {self.describe()} has a route beginning with a '/'. Remove this "
38 "slash as it is unnecessary. If this pattern is targeted in an "
39 "include(), ensure the include() pattern has a trailing '/'.",
40 id="urls.W002",
41 )
42 return [warning]
43 else:
44 return []
45
46
47class RegexPattern(CheckURLMixin):
48 def __init__(self, regex, name=None, is_endpoint=False):
49 self._regex = regex
50 self._is_endpoint = is_endpoint
51 self.name = name
52 self.converters = {}
53 self.regex = self._compile(str(regex))
54
55 def match(self, path):
56 match = (
57 self.regex.fullmatch(path)
58 if self._is_endpoint and self.regex.pattern.endswith("$")
59 else self.regex.search(path)
60 )
61 if match:
62 # If there are any named groups, use those as kwargs, ignoring
63 # non-named groups. Otherwise, pass all non-named arguments as
64 # positional arguments.
65 kwargs = match.groupdict()
66 args = () if kwargs else match.groups()
67 kwargs = {k: v for k, v in kwargs.items() if v is not None}
68 return path[match.end() :], args, kwargs
69 return None
70
71 def check(self):
72 warnings = []
73 warnings.extend(self._check_pattern_startswith_slash())
74 if not self._is_endpoint:
75 warnings.extend(self._check_include_trailing_dollar())
76 return warnings
77
78 def _check_include_trailing_dollar(self):
79 regex_pattern = self.regex.pattern
80 if regex_pattern.endswith("$") and not regex_pattern.endswith(r"\$"):
81 return [
82 Warning(
83 f"Your URL pattern {self.describe()} uses include with a route ending with a '$'. "
84 "Remove the dollar from the route to avoid problems including "
85 "URLs.",
86 id="urls.W001",
87 )
88 ]
89 else:
90 return []
91
92 def _compile(self, regex):
93 """Compile and return the given regular expression."""
94 try:
95 return re.compile(regex)
96 except re.error as e:
97 raise ImproperlyConfigured(
98 f'"{regex}" is not a valid regular expression: {e}'
99 ) from e
100
101 def __str__(self):
102 return str(self._regex)
103
104
105_PATH_PARAMETER_COMPONENT_RE = _lazy_re_compile(
106 r"<(?:(?P<converter>[^>:]+):)?(?P<parameter>[^>]+)>"
107)
108
109
110def _route_to_regex(route, is_endpoint=False):
111 """
112 Convert a path pattern into a regular expression. Return the regular
113 expression and a dictionary mapping the capture names to the converters.
114 For example, 'foo/<int:pk>' returns '^foo\\/(?P<pk>[0-9]+)'
115 and {'pk': <plain.urls.converters.IntConverter>}.
116 """
117 original_route = route
118 parts = ["^"]
119 converters = {}
120 while True:
121 match = _PATH_PARAMETER_COMPONENT_RE.search(route)
122 if not match:
123 parts.append(re.escape(route))
124 break
125 elif not set(match.group()).isdisjoint(string.whitespace):
126 raise ImproperlyConfigured(
127 f"URL route '{original_route}' cannot contain whitespace in angle brackets "
128 "<…>."
129 )
130 parts.append(re.escape(route[: match.start()]))
131 route = route[match.end() :]
132 parameter = match["parameter"]
133 if not parameter.isidentifier():
134 raise ImproperlyConfigured(
135 f"URL route '{original_route}' uses parameter name {parameter!r} which isn't a valid "
136 "Python identifier."
137 )
138 raw_converter = match["converter"]
139 if raw_converter is None:
140 # If a converter isn't specified, the default is `str`.
141 raw_converter = "str"
142 try:
143 converter = get_converter(raw_converter)
144 except KeyError as e:
145 raise ImproperlyConfigured(
146 f"URL route {original_route!r} uses invalid converter {raw_converter!r}."
147 ) from e
148 converters[parameter] = converter
149 parts.append("(?P<" + parameter + ">" + converter.regex + ")")
150 if is_endpoint:
151 parts.append(r"\Z")
152 return "".join(parts), converters
153
154
155class RoutePattern(CheckURLMixin):
156 def __init__(self, route, name=None, is_endpoint=False):
157 self._route = route
158 self._is_endpoint = is_endpoint
159 self.name = name
160 self.converters = _route_to_regex(str(route), is_endpoint)[1]
161 self.regex = self._compile(str(route))
162
163 def match(self, path):
164 match = self.regex.search(path)
165 if match:
166 # RoutePattern doesn't allow non-named groups so args are ignored.
167 kwargs = match.groupdict()
168 for key, value in kwargs.items():
169 converter = self.converters[key]
170 try:
171 kwargs[key] = converter.to_python(value)
172 except ValueError:
173 return None
174 return path[match.end() :], (), kwargs
175 return None
176
177 def check(self):
178 warnings = self._check_pattern_startswith_slash()
179 route = self._route
180 if "(?P<" in route or route.startswith("^") or route.endswith("$"):
181 warnings.append(
182 Warning(
183 f"Your URL pattern {self.describe()} has a route that contains '(?P<', begins "
184 "with a '^', or ends with a '$'. This was likely an oversight "
185 "when migrating to plain.urls.path().",
186 id="2_0.W001",
187 )
188 )
189 return warnings
190
191 def _compile(self, route):
192 return re.compile(_route_to_regex(route, self._is_endpoint)[0])
193
194 def __str__(self):
195 return str(self._route)
196
197
198class URLPattern:
199 def __init__(self, *, pattern, view, name=None):
200 self.pattern = pattern
201 self.view = view
202 self.name = name
203
204 def __repr__(self):
205 return f"<{self.__class__.__name__} {self.pattern.describe()}>"
206
207 def check(self):
208 warnings = self._check_pattern_name()
209 warnings.extend(self.pattern.check())
210 return warnings
211
212 def _check_pattern_name(self):
213 """
214 Check that the pattern name does not contain a colon.
215 """
216 if self.pattern.name is not None and ":" in self.pattern.name:
217 warning = Warning(
218 f"Your URL pattern {self.pattern.describe()} has a name including a ':'. Remove the colon, to "
219 "avoid ambiguous namespace references.",
220 id="urls.W003",
221 )
222 return [warning]
223 else:
224 return []
225
226 def resolve(self, path):
227 match = self.pattern.match(path)
228 if match:
229 new_path, args, captured_kwargs = match
230 from .resolvers import ResolverMatch
231
232 return ResolverMatch(
233 view=self.view,
234 args=args,
235 kwargs=captured_kwargs,
236 url_name=self.pattern.name,
237 route=str(self.pattern),
238 )