Plain is headed towards 1.0! Subscribe for development updates →

  1from __future__ import annotations
  2
  3from typing import TYPE_CHECKING
  4from urllib.parse import urlparse
  5
  6import requests
  7
  8from ..results import AuditResult, CheckResult
  9from .base import Audit
 10
 11if TYPE_CHECKING:
 12    from ..scanner import Scanner
 13
 14
 15class RedirectsAudit(Audit):
 16    """Redirect hygiene checks."""
 17
 18    name = "Redirects"
 19    slug = "redirects"
 20    description = "Validates redirect configuration including HTTP to HTTPS upgrades, redirect chains, and URL canonicalization."
 21
 22    def check(self, scanner: Scanner) -> AuditResult:
 23        """Check redirect configuration and hygiene."""
 24        response = scanner.fetch()
 25
 26        # Check if any redirects occurred
 27        if not response.history:
 28            # No redirects - but we still run checks on the final URL
 29            checks = [
 30                self._check_final_url_https(scanner.url, response.url),
 31                self._check_trailing_slash_redirect(scanner.url, response),
 32            ]
 33
 34            return AuditResult(
 35                name=self.name,
 36                detected=True,
 37                required=self.required,
 38                checks=checks,
 39                description=self.description,
 40            )
 41
 42        # Redirects occurred - run all checks
 43        checks = [
 44            self._check_http_to_https(scanner.url, response),
 45            self._check_redirect_chain_length(response),
 46            self._check_final_url_https(scanner.url, response.url),
 47            self._check_cross_origin_redirects(scanner.url, response),
 48            self._check_status_codes(response),
 49            self._check_trailing_slash_redirect(scanner.url, response),
 50        ]
 51
 52        return AuditResult(
 53            name=self.name,
 54            detected=True,
 55            required=self.required,
 56            checks=checks,
 57            description=self.description,
 58        )
 59
 60    def _check_http_to_https(
 61        self, original_url: str, response: requests.Response
 62    ) -> CheckResult:
 63        """Check if HTTP redirects to HTTPS."""
 64        original_parsed = urlparse(original_url)
 65
 66        # Only check if original URL was HTTP
 67        if original_parsed.scheme != "http":
 68            return CheckResult(
 69                name="http-to-https",
 70                passed=True,
 71                message="Original URL is already HTTPS",
 72            )
 73
 74        # Check if we ended up on HTTPS
 75        final_parsed = urlparse(response.url)
 76        if final_parsed.scheme == "https":
 77            return CheckResult(
 78                name="http-to-https",
 79                passed=True,
 80                message="HTTP successfully redirects to HTTPS",
 81            )
 82
 83        return CheckResult(
 84            name="http-to-https",
 85            passed=False,
 86            message="HTTP does not redirect to HTTPS",
 87        )
 88
 89    def _check_redirect_chain_length(self, response: requests.Response) -> CheckResult:
 90        """Check that redirect chain is not too long."""
 91        redirect_count = len(response.history)
 92
 93        # More than 3 redirects is generally excessive
 94        max_redirects = 3
 95
 96        if redirect_count > max_redirects:
 97            return CheckResult(
 98                name="redirect-chain",
 99                passed=False,
100                message=f"Redirect chain has {redirect_count} redirects (recommended maximum: {max_redirects})",
101            )
102
103        return CheckResult(
104            name="redirect-chain",
105            passed=True,
106            message=f"Redirect chain has {redirect_count} redirect(s)",
107        )
108
109    def _check_final_url_https(self, original_url: str, final_url: str) -> CheckResult:
110        """Check that final URL is HTTPS."""
111        final_parsed = urlparse(final_url)
112
113        if final_parsed.scheme == "https":
114            return CheckResult(
115                name="final-url-https",
116                passed=True,
117                message="Final URL uses HTTPS",
118            )
119
120        # Only fail if the original URL was HTTP (expecting an upgrade)
121        original_parsed = urlparse(original_url)
122        if original_parsed.scheme == "http":
123            return CheckResult(
124                name="final-url-https",
125                passed=False,
126                message=f"Final URL uses {final_parsed.scheme} instead of HTTPS",
127            )
128
129        # Original was HTTPS, final is not HTTPS - this is bad (downgrade)
130        return CheckResult(
131            name="final-url-https",
132            passed=False,
133            message=f"HTTPS was downgraded to {final_parsed.scheme}",
134        )
135
136    def _check_cross_origin_redirects(
137        self, original_url: str, response: requests.Response
138    ) -> CheckResult:
139        """
140        Check for problematic cross-origin redirects.
141
142        Mozilla Observatory's approach: Only fail if HTTP redirects to a different host on HTTPS.
143        This prevents HSTS from protecting the initial HTTP request.
144
145        HTTPS->HTTPS redirects to different hosts (like www canonicalization) are acceptable.
146        """
147        original_parsed = urlparse(original_url)
148
149        # Only check if we started with HTTP
150        if original_parsed.scheme != "http":
151            return CheckResult(
152                name="cross-origin-redirects",
153                passed=True,
154                message="Original URL is already HTTPS (cross-origin redirects acceptable)",
155            )
156
157        # Check if first redirect is to HTTPS on a different host
158        if response.history:
159            first_redirect = response.history[0]
160            first_redirect_parsed = urlparse(first_redirect.url)
161
162            # If first redirect is HTTP->HTTPS and changes host, this prevents HSTS
163            if (
164                first_redirect_parsed.scheme == "https"
165                and first_redirect_parsed.netloc != original_parsed.netloc
166            ):
167                return CheckResult(
168                    name="cross-origin-redirects",
169                    passed=False,
170                    message=f"HTTP to HTTPS redirect changes host to {first_redirect_parsed.netloc} (prevents HSTS on initial request)",
171                )
172
173        # Check final URL
174        final_parsed = urlparse(response.url)
175        if final_parsed.netloc != original_parsed.netloc:
176            # Cross-origin but passed the checks above
177            return CheckResult(
178                name="cross-origin-redirects",
179                passed=True,
180                message=f"Cross-origin redirect to {final_parsed.netloc} is acceptable",
181            )
182
183        return CheckResult(
184            name="cross-origin-redirects",
185            passed=True,
186            message="All redirects stay on the same domain",
187        )
188
189    def _check_status_codes(self, response: requests.Response) -> CheckResult:
190        """Check that redirects use appropriate status codes."""
191        # Valid redirect status codes: 301, 302, 303, 307, 308
192        # Preferred: 301 (permanent), 302/307 (temporary), 308 (permanent, preserves method)
193        valid_codes = {301, 302, 303, 307, 308}
194        invalid_redirects = []
195
196        for redirect_response in response.history:
197            status_code = redirect_response.status_code
198            if status_code not in valid_codes:
199                invalid_redirects.append(f"{redirect_response.url} ({status_code})")
200
201        if invalid_redirects:
202            return CheckResult(
203                name="redirect-status-codes",
204                passed=False,
205                message=f"Invalid redirect status codes found: {', '.join(invalid_redirects)}",
206            )
207
208        return CheckResult(
209            name="redirect-status-codes",
210            passed=True,
211            message=f"All {len(response.history)} redirect(s) use valid status codes",
212        )
213
214    def _check_trailing_slash_redirect(
215        self, original_url: str, response: requests.Response
216    ) -> CheckResult:
217        """Check if redirect is just adding/removing a trailing slash."""
218        if not response.history:
219            return CheckResult(
220                name="trailing-slash-redirect",
221                passed=True,
222                message="No redirects occurred",
223            )
224
225        original_parsed = urlparse(original_url)
226        final_parsed = urlparse(response.url)
227
228        # Check if everything is the same except trailing slash
229        if (
230            original_parsed.scheme == final_parsed.scheme
231            and original_parsed.netloc == final_parsed.netloc
232            and original_parsed.params == final_parsed.params
233            and original_parsed.query == final_parsed.query
234            and original_parsed.fragment == final_parsed.fragment
235        ):
236            # Check if paths differ only by trailing slash
237            orig_path = original_parsed.path
238            final_path = final_parsed.path
239
240            if orig_path.rstrip("/") == final_path.rstrip("/"):
241                # It's a trailing slash redirect
242                if orig_path.endswith("/") and not final_path.endswith("/"):
243                    return CheckResult(
244                        name="trailing-slash-redirect",
245                        passed=False,
246                        message="Unnecessary redirect removes trailing slash",
247                    )
248                elif not orig_path.endswith("/") and final_path.endswith("/"):
249                    return CheckResult(
250                        name="trailing-slash-redirect",
251                        passed=False,
252                        message="Unnecessary redirect adds trailing slash",
253                    )
254
255        return CheckResult(
256            name="trailing-slash-redirect",
257            passed=True,
258            message="No trailing slash redirects detected",
259        )