Plain is headed towards 1.0! Subscribe for development updates →

 1from __future__ import annotations
 2
 3from urllib.parse import parse_qs, urlparse
 4
 5
 6def extract_tracking_params(url: str) -> tuple[str, str, str]:
 7    """
 8    Extract tracking parameters from a URL.
 9
10    Supports:
11    - UTM parameters (utm_source, utm_medium, utm_campaign)
12    - Simple ref parameter
13    - Auto-detection of tracking IDs (gclid, fbclid)
14
15    Args:
16        url: Full URL to parse
17
18    Returns:
19        Tuple of (source, medium, campaign) strings
20    """
21    parsed = urlparse(url)
22    params = parse_qs(parsed.query)
23
24    source = ""
25    medium = ""
26    campaign = ""
27
28    # Extract source (priority order)
29    if utm_source := params.get("utm_source", [""])[0]:
30        source = utm_source.strip().lower()
31    elif ref := params.get("ref", [""])[0]:
32        source = ref.strip().lower()
33    elif "gclid" in params:
34        source = "google"
35    elif "fbclid" in params:
36        source = "facebook"
37    elif "msclkid" in params:
38        source = "bing"
39    elif "ttclid" in params:
40        source = "tiktok"
41    elif "twclid" in params:
42        source = "twitter"
43
44    # Extract medium
45    if utm_medium := params.get("utm_medium", [""])[0]:
46        medium = utm_medium.strip().lower()
47    elif "gclid" in params:
48        medium = "cpc"
49    elif "fbclid" in params:
50        medium = "social"
51    elif "msclkid" in params:
52        medium = "cpc"
53    elif "ttclid" in params:
54        medium = "cpc"
55    elif "twclid" in params:
56        medium = "cpc"
57
58    # Extract campaign
59    if utm_campaign := params.get("utm_campaign", [""])[0]:
60        campaign = utm_campaign.strip().lower()
61
62    return source, medium, campaign