1from __future__ import annotations
2
3from urllib.parse import parse_qs, urlparse
4
5
6def extract_tracking_params(url: str) -> tuple[str, str, str]:
7 """
8 Extract tracking parameters from a URL.
9
10 Supports:
11 - UTM parameters (utm_source, utm_medium, utm_campaign)
12 - Simple ref parameter
13 - Auto-detection of tracking IDs (gclid, fbclid)
14
15 Args:
16 url: Full URL to parse
17
18 Returns:
19 Tuple of (source, medium, campaign) strings
20 """
21 parsed = urlparse(url)
22 params = parse_qs(parsed.query)
23
24 source = ""
25 medium = ""
26 campaign = ""
27
28 # Extract source (priority order)
29 if utm_source := params.get("utm_source", [""])[0]:
30 source = utm_source.strip().lower()
31 elif ref := params.get("ref", [""])[0]:
32 source = ref.strip().lower()
33 elif "gclid" in params:
34 source = "google"
35 elif "fbclid" in params:
36 source = "facebook"
37 elif "msclkid" in params:
38 source = "bing"
39 elif "ttclid" in params:
40 source = "tiktok"
41 elif "twclid" in params:
42 source = "twitter"
43
44 # Extract medium
45 if utm_medium := params.get("utm_medium", [""])[0]:
46 medium = utm_medium.strip().lower()
47 elif "gclid" in params:
48 medium = "cpc"
49 elif "fbclid" in params:
50 medium = "social"
51 elif "msclkid" in params:
52 medium = "cpc"
53 elif "ttclid" in params:
54 medium = "cpc"
55 elif "twclid" in params:
56 medium = "cpc"
57
58 # Extract campaign
59 if utm_campaign := params.get("utm_campaign", [""])[0]:
60 campaign = utm_campaign.strip().lower()
61
62 return source, medium, campaign