1from __future__ import annotations
2
3from typing import TYPE_CHECKING
4from urllib.parse import urlparse
5
6import requests
7
8from ..results import AuditResult, CheckResult
9from .base import Audit
10
11if TYPE_CHECKING:
12 from ..scanner import Scanner
13
14
15class RedirectsAudit(Audit):
16 """Redirect hygiene checks."""
17
18 name = "Redirects"
19 slug = "redirects"
20 description = "Validates redirect configuration including HTTP to HTTPS upgrades, redirect chains, and URL canonicalization."
21
22 def check(self, scanner: Scanner) -> AuditResult:
23 """Check redirect configuration and hygiene."""
24 response = scanner.fetch()
25
26 # Check if any redirects occurred
27 if not response.history:
28 # No redirects - but we still run checks on the final URL
29 checks = [
30 self._check_final_url_https(scanner.url, response.url),
31 self._check_trailing_slash_redirect(scanner.url, response),
32 ]
33
34 return AuditResult(
35 name=self.name,
36 detected=True,
37 required=self.required,
38 checks=checks,
39 description=self.description,
40 )
41
42 # Redirects occurred - run all checks
43 checks = [
44 self._check_http_to_https(scanner.url, response),
45 self._check_redirect_chain_length(response),
46 self._check_final_url_https(scanner.url, response.url),
47 self._check_cross_origin_redirects(scanner.url, response),
48 self._check_status_codes(response),
49 self._check_trailing_slash_redirect(scanner.url, response),
50 ]
51
52 return AuditResult(
53 name=self.name,
54 detected=True,
55 required=self.required,
56 checks=checks,
57 description=self.description,
58 )
59
60 def _check_http_to_https(
61 self, original_url: str, response: requests.Response
62 ) -> CheckResult:
63 """Check if HTTP redirects to HTTPS."""
64 original_parsed = urlparse(original_url)
65
66 # Only check if original URL was HTTP
67 if original_parsed.scheme != "http":
68 return CheckResult(
69 name="http-to-https",
70 passed=True,
71 message="Original URL is already HTTPS",
72 )
73
74 # Check if we ended up on HTTPS
75 final_parsed = urlparse(response.url)
76 if final_parsed.scheme == "https":
77 return CheckResult(
78 name="http-to-https",
79 passed=True,
80 message="HTTP successfully redirects to HTTPS",
81 )
82
83 return CheckResult(
84 name="http-to-https",
85 passed=False,
86 message="HTTP does not redirect to HTTPS",
87 )
88
89 def _check_redirect_chain_length(self, response: requests.Response) -> CheckResult:
90 """Check that redirect chain is not too long."""
91 redirect_count = len(response.history)
92
93 # More than 3 redirects is generally excessive
94 max_redirects = 3
95
96 if redirect_count > max_redirects:
97 return CheckResult(
98 name="redirect-chain",
99 passed=False,
100 message=f"Redirect chain has {redirect_count} redirects (recommended maximum: {max_redirects})",
101 )
102
103 return CheckResult(
104 name="redirect-chain",
105 passed=True,
106 message=f"Redirect chain has {redirect_count} redirect(s)",
107 )
108
109 def _check_final_url_https(self, original_url: str, final_url: str) -> CheckResult:
110 """Check that final URL is HTTPS."""
111 final_parsed = urlparse(final_url)
112
113 if final_parsed.scheme == "https":
114 return CheckResult(
115 name="final-url-https",
116 passed=True,
117 message="Final URL uses HTTPS",
118 )
119
120 # Only fail if the original URL was HTTP (expecting an upgrade)
121 original_parsed = urlparse(original_url)
122 if original_parsed.scheme == "http":
123 return CheckResult(
124 name="final-url-https",
125 passed=False,
126 message=f"Final URL uses {final_parsed.scheme} instead of HTTPS",
127 )
128
129 # Original was HTTPS, final is not HTTPS - this is bad (downgrade)
130 return CheckResult(
131 name="final-url-https",
132 passed=False,
133 message=f"HTTPS was downgraded to {final_parsed.scheme}",
134 )
135
136 def _check_cross_origin_redirects(
137 self, original_url: str, response: requests.Response
138 ) -> CheckResult:
139 """
140 Check for problematic cross-origin redirects.
141
142 Mozilla Observatory's approach: Only fail if HTTP redirects to a different host on HTTPS.
143 This prevents HSTS from protecting the initial HTTP request.
144
145 HTTPS->HTTPS redirects to different hosts (like www canonicalization) are acceptable.
146 """
147 original_parsed = urlparse(original_url)
148
149 # Only check if we started with HTTP
150 if original_parsed.scheme != "http":
151 return CheckResult(
152 name="cross-origin-redirects",
153 passed=True,
154 message="Original URL is already HTTPS (cross-origin redirects acceptable)",
155 )
156
157 # Check if first redirect is to HTTPS on a different host
158 if response.history:
159 first_redirect = response.history[0]
160 first_redirect_parsed = urlparse(first_redirect.url)
161
162 # If first redirect is HTTP->HTTPS and changes host, this prevents HSTS
163 if (
164 first_redirect_parsed.scheme == "https"
165 and first_redirect_parsed.netloc != original_parsed.netloc
166 ):
167 return CheckResult(
168 name="cross-origin-redirects",
169 passed=False,
170 message=f"HTTP to HTTPS redirect changes host to {first_redirect_parsed.netloc} (prevents HSTS on initial request)",
171 )
172
173 # Check final URL
174 final_parsed = urlparse(response.url)
175 if final_parsed.netloc != original_parsed.netloc:
176 # Cross-origin but passed the checks above
177 return CheckResult(
178 name="cross-origin-redirects",
179 passed=True,
180 message=f"Cross-origin redirect to {final_parsed.netloc} is acceptable",
181 )
182
183 return CheckResult(
184 name="cross-origin-redirects",
185 passed=True,
186 message="All redirects stay on the same domain",
187 )
188
189 def _check_status_codes(self, response: requests.Response) -> CheckResult:
190 """Check that redirects use appropriate status codes."""
191 # Valid redirect status codes: 301, 302, 303, 307, 308
192 # Preferred: 301 (permanent), 302/307 (temporary), 308 (permanent, preserves method)
193 valid_codes = {301, 302, 303, 307, 308}
194 invalid_redirects = []
195
196 for redirect_response in response.history:
197 status_code = redirect_response.status_code
198 if status_code not in valid_codes:
199 invalid_redirects.append(f"{redirect_response.url} ({status_code})")
200
201 if invalid_redirects:
202 return CheckResult(
203 name="redirect-status-codes",
204 passed=False,
205 message=f"Invalid redirect status codes found: {', '.join(invalid_redirects)}",
206 )
207
208 return CheckResult(
209 name="redirect-status-codes",
210 passed=True,
211 message=f"All {len(response.history)} redirect(s) use valid status codes",
212 )
213
214 def _check_trailing_slash_redirect(
215 self, original_url: str, response: requests.Response
216 ) -> CheckResult:
217 """Check if redirect is just adding/removing a trailing slash."""
218 if not response.history:
219 return CheckResult(
220 name="trailing-slash-redirect",
221 passed=True,
222 message="No redirects occurred",
223 )
224
225 original_parsed = urlparse(original_url)
226 final_parsed = urlparse(response.url)
227
228 # Check if everything is the same except trailing slash
229 if (
230 original_parsed.scheme == final_parsed.scheme
231 and original_parsed.netloc == final_parsed.netloc
232 and original_parsed.params == final_parsed.params
233 and original_parsed.query == final_parsed.query
234 and original_parsed.fragment == final_parsed.fragment
235 ):
236 # Check if paths differ only by trailing slash
237 orig_path = original_parsed.path
238 final_path = final_parsed.path
239
240 if orig_path.rstrip("/") == final_path.rstrip("/"):
241 # It's a trailing slash redirect
242 if orig_path.endswith("/") and not final_path.endswith("/"):
243 return CheckResult(
244 name="trailing-slash-redirect",
245 passed=False,
246 message="Unnecessary redirect removes trailing slash",
247 )
248 elif not orig_path.endswith("/") and final_path.endswith("/"):
249 return CheckResult(
250 name="trailing-slash-redirect",
251 passed=False,
252 message="Unnecessary redirect adds trailing slash",
253 )
254
255 return CheckResult(
256 name="trailing-slash-redirect",
257 passed=True,
258 message="No trailing slash redirects detected",
259 )