Plain is headed towards 1.0! Subscribe for development updates →

   1"""
   2Create SQL statements for QuerySets.
   3
   4The code in here encapsulates all of the SQL construction so that QuerySets
   5themselves do not have to (and could be backed by things other than SQL
   6databases). The abstraction barrier only works one way: this module has to know
   7all about the internals of models in order to get the information it needs.
   8"""
   9
  10import copy
  11import difflib
  12import functools
  13import sys
  14from collections import Counter, namedtuple
  15from collections.abc import Iterator, Mapping
  16from functools import cached_property
  17from itertools import chain, count, product
  18from string import ascii_uppercase
  19
  20from plain.exceptions import FieldDoesNotExist, FieldError
  21from plain.models.aggregates import Count
  22from plain.models.constants import LOOKUP_SEP
  23from plain.models.db import NotSupportedError, db_connection
  24from plain.models.expressions import (
  25    BaseExpression,
  26    Col,
  27    Exists,
  28    F,
  29    OuterRef,
  30    Ref,
  31    ResolvedOuterRef,
  32    Value,
  33)
  34from plain.models.fields import Field
  35from plain.models.fields.related_lookups import MultiColSource
  36from plain.models.lookups import Lookup
  37from plain.models.query_utils import (
  38    Q,
  39    check_rel_lookup_compatibility,
  40    refs_expression,
  41)
  42from plain.models.sql.constants import INNER, LOUTER, ORDER_DIR, SINGLE
  43from plain.models.sql.datastructures import BaseTable, Empty, Join, MultiJoin
  44from plain.models.sql.where import AND, OR, ExtraWhere, NothingNode, WhereNode
  45from plain.utils.regex_helper import _lazy_re_compile
  46from plain.utils.tree import Node
  47
  48__all__ = ["Query", "RawQuery"]
  49
  50# Quotation marks ('"`[]), whitespace characters, semicolons, or inline
  51# SQL comments are forbidden in column aliases.
  52FORBIDDEN_ALIAS_PATTERN = _lazy_re_compile(r"['`\"\]\[;\s]|--|/\*|\*/")
  53
  54# Inspired from
  55# https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS
  56EXPLAIN_OPTIONS_PATTERN = _lazy_re_compile(r"[\w\-]+")
  57
  58
  59def get_field_names_from_opts(opts):
  60    if opts is None:
  61        return set()
  62    return set(
  63        chain.from_iterable(
  64            (f.name, f.attname) if f.concrete else (f.name,) for f in opts.get_fields()
  65        )
  66    )
  67
  68
  69def get_children_from_q(q):
  70    for child in q.children:
  71        if isinstance(child, Node):
  72            yield from get_children_from_q(child)
  73        else:
  74            yield child
  75
  76
  77JoinInfo = namedtuple(
  78    "JoinInfo",
  79    ("final_field", "targets", "opts", "joins", "path", "transform_function"),
  80)
  81
  82
  83class RawQuery:
  84    """A single raw SQL query."""
  85
  86    def __init__(self, sql, params=()):
  87        self.params = params
  88        self.sql = sql
  89        self.cursor = None
  90
  91        # Mirror some properties of a normal query so that
  92        # the compiler can be used to process results.
  93        self.low_mark, self.high_mark = 0, None  # Used for offset/limit
  94        self.extra_select = {}
  95        self.annotation_select = {}
  96
  97    def chain(self):
  98        return self.clone()
  99
 100    def clone(self):
 101        return RawQuery(self.sql, params=self.params)
 102
 103    def get_columns(self):
 104        if self.cursor is None:
 105            self._execute_query()
 106        converter = db_connection.introspection.identifier_converter
 107        return [converter(column_meta[0]) for column_meta in self.cursor.description]
 108
 109    def __iter__(self):
 110        # Always execute a new query for a new iterator.
 111        # This could be optimized with a cache at the expense of RAM.
 112        self._execute_query()
 113        if not db_connection.features.can_use_chunked_reads:
 114            # If the database can't use chunked reads we need to make sure we
 115            # evaluate the entire query up front.
 116            result = list(self.cursor)
 117        else:
 118            result = self.cursor
 119        return iter(result)
 120
 121    def __repr__(self):
 122        return f"<{self.__class__.__name__}: {self}>"
 123
 124    @property
 125    def params_type(self):
 126        if self.params is None:
 127            return None
 128        return dict if isinstance(self.params, Mapping) else tuple
 129
 130    def __str__(self):
 131        if self.params_type is None:
 132            return self.sql
 133        return self.sql % self.params_type(self.params)
 134
 135    def _execute_query(self):
 136        # Adapt parameters to the database, as much as possible considering
 137        # that the target type isn't known. See #17755.
 138        params_type = self.params_type
 139        adapter = db_connection.ops.adapt_unknown_value
 140        if params_type is tuple:
 141            params = tuple(adapter(val) for val in self.params)
 142        elif params_type is dict:
 143            params = {key: adapter(val) for key, val in self.params.items()}
 144        elif params_type is None:
 145            params = None
 146        else:
 147            raise RuntimeError(f"Unexpected params type: {params_type}")
 148
 149        self.cursor = db_connection.cursor()
 150        self.cursor.execute(self.sql, params)
 151
 152
 153ExplainInfo = namedtuple("ExplainInfo", ("format", "options"))
 154
 155
 156class Query(BaseExpression):
 157    """A single SQL query."""
 158
 159    alias_prefix = "T"
 160    empty_result_set_value = None
 161    subq_aliases = frozenset([alias_prefix])
 162
 163    compiler = "SQLCompiler"
 164
 165    base_table_class = BaseTable
 166    join_class = Join
 167
 168    default_cols = True
 169    default_ordering = True
 170    standard_ordering = True
 171
 172    filter_is_sticky = False
 173    subquery = False
 174
 175    # SQL-related attributes.
 176    # Select and related select clauses are expressions to use in the SELECT
 177    # clause of the query. The select is used for cases where we want to set up
 178    # the select clause to contain other than default fields (values(),
 179    # subqueries...). Note that annotations go to annotations dictionary.
 180    select = ()
 181    # The group_by attribute can have one of the following forms:
 182    #  - None: no group by at all in the query
 183    #  - A tuple of expressions: group by (at least) those expressions.
 184    #    String refs are also allowed for now.
 185    #  - True: group by all select fields of the model
 186    # See compiler.get_group_by() for details.
 187    group_by = None
 188    order_by = ()
 189    low_mark = 0  # Used for offset/limit.
 190    high_mark = None  # Used for offset/limit.
 191    distinct = False
 192    distinct_fields = ()
 193    select_for_update = False
 194    select_for_update_nowait = False
 195    select_for_update_skip_locked = False
 196    select_for_update_of = ()
 197    select_for_no_key_update = False
 198    select_related = False
 199    has_select_fields = False
 200    # Arbitrary limit for select_related to prevents infinite recursion.
 201    max_depth = 5
 202    # Holds the selects defined by a call to values() or values_list()
 203    # excluding annotation_select and extra_select.
 204    values_select = ()
 205
 206    # SQL annotation-related attributes.
 207    annotation_select_mask = None
 208    _annotation_select_cache = None
 209
 210    # Set combination attributes.
 211    combinator = None
 212    combinator_all = False
 213    combined_queries = ()
 214
 215    # These are for extensions. The contents are more or less appended verbatim
 216    # to the appropriate clause.
 217    extra_select_mask = None
 218    _extra_select_cache = None
 219
 220    extra_tables = ()
 221    extra_order_by = ()
 222
 223    # A tuple that is a set of model field names and either True, if these are
 224    # the fields to defer, or False if these are the only fields to load.
 225    deferred_loading = (frozenset(), True)
 226
 227    explain_info = None
 228
 229    def __init__(self, model, alias_cols=True):
 230        self.model = model
 231        self.alias_refcount = {}
 232        # alias_map is the most important data structure regarding joins.
 233        # It's used for recording which joins exist in the query and what
 234        # types they are. The key is the alias of the joined table (possibly
 235        # the table name) and the value is a Join-like object (see
 236        # sql.datastructures.Join for more information).
 237        self.alias_map = {}
 238        # Whether to provide alias to columns during reference resolving.
 239        self.alias_cols = alias_cols
 240        # Sometimes the query contains references to aliases in outer queries (as
 241        # a result of split_exclude). Correct alias quoting needs to know these
 242        # aliases too.
 243        # Map external tables to whether they are aliased.
 244        self.external_aliases = {}
 245        self.table_map = {}  # Maps table names to list of aliases.
 246        self.used_aliases = set()
 247
 248        self.where = WhereNode()
 249        # Maps alias -> Annotation Expression.
 250        self.annotations = {}
 251        # These are for extensions. The contents are more or less appended
 252        # verbatim to the appropriate clause.
 253        self.extra = {}  # Maps col_alias -> (col_sql, params).
 254
 255        self._filtered_relations = {}
 256
 257    @property
 258    def output_field(self):
 259        if len(self.select) == 1:
 260            select = self.select[0]
 261            return getattr(select, "target", None) or select.field
 262        elif len(self.annotation_select) == 1:
 263            return next(iter(self.annotation_select.values())).output_field
 264
 265    @cached_property
 266    def base_table(self):
 267        for alias in self.alias_map:
 268            return alias
 269
 270    def __str__(self):
 271        """
 272        Return the query as a string of SQL with the parameter values
 273        substituted in (use sql_with_params() to see the unsubstituted string).
 274
 275        Parameter values won't necessarily be quoted correctly, since that is
 276        done by the database interface at execution time.
 277        """
 278        sql, params = self.sql_with_params()
 279        return sql % params
 280
 281    def sql_with_params(self):
 282        """
 283        Return the query as an SQL string and the parameters that will be
 284        substituted into the query.
 285        """
 286        return self.get_compiler().as_sql()
 287
 288    def __deepcopy__(self, memo):
 289        """Limit the amount of work when a Query is deepcopied."""
 290        result = self.clone()
 291        memo[id(self)] = result
 292        return result
 293
 294    def get_compiler(self, *, elide_empty=True):
 295        return db_connection.ops.compiler(self.compiler)(
 296            self, db_connection, elide_empty
 297        )
 298
 299    def get_meta(self):
 300        """
 301        Return the Options instance (the model._meta) from which to start
 302        processing. Normally, this is self.model._meta, but it can be changed
 303        by subclasses.
 304        """
 305        if self.model:
 306            return self.model._meta
 307
 308    def clone(self):
 309        """
 310        Return a copy of the current Query. A lightweight alternative to
 311        deepcopy().
 312        """
 313        obj = Empty()
 314        obj.__class__ = self.__class__
 315        # Copy references to everything.
 316        obj.__dict__ = self.__dict__.copy()
 317        # Clone attributes that can't use shallow copy.
 318        obj.alias_refcount = self.alias_refcount.copy()
 319        obj.alias_map = self.alias_map.copy()
 320        obj.external_aliases = self.external_aliases.copy()
 321        obj.table_map = self.table_map.copy()
 322        obj.where = self.where.clone()
 323        obj.annotations = self.annotations.copy()
 324        if self.annotation_select_mask is not None:
 325            obj.annotation_select_mask = self.annotation_select_mask.copy()
 326        if self.combined_queries:
 327            obj.combined_queries = tuple(
 328                [query.clone() for query in self.combined_queries]
 329            )
 330        # _annotation_select_cache cannot be copied, as doing so breaks the
 331        # (necessary) state in which both annotations and
 332        # _annotation_select_cache point to the same underlying objects.
 333        # It will get re-populated in the cloned queryset the next time it's
 334        # used.
 335        obj._annotation_select_cache = None
 336        obj.extra = self.extra.copy()
 337        if self.extra_select_mask is not None:
 338            obj.extra_select_mask = self.extra_select_mask.copy()
 339        if self._extra_select_cache is not None:
 340            obj._extra_select_cache = self._extra_select_cache.copy()
 341        if self.select_related is not False:
 342            # Use deepcopy because select_related stores fields in nested
 343            # dicts.
 344            obj.select_related = copy.deepcopy(obj.select_related)
 345        if "subq_aliases" in self.__dict__:
 346            obj.subq_aliases = self.subq_aliases.copy()
 347        obj.used_aliases = self.used_aliases.copy()
 348        obj._filtered_relations = self._filtered_relations.copy()
 349        # Clear the cached_property, if it exists.
 350        obj.__dict__.pop("base_table", None)
 351        return obj
 352
 353    def chain(self, klass=None):
 354        """
 355        Return a copy of the current Query that's ready for another operation.
 356        The klass argument changes the type of the Query, e.g. UpdateQuery.
 357        """
 358        obj = self.clone()
 359        if klass and obj.__class__ != klass:
 360            obj.__class__ = klass
 361        if not obj.filter_is_sticky:
 362            obj.used_aliases = set()
 363        obj.filter_is_sticky = False
 364        if hasattr(obj, "_setup_query"):
 365            obj._setup_query()
 366        return obj
 367
 368    def relabeled_clone(self, change_map):
 369        clone = self.clone()
 370        clone.change_aliases(change_map)
 371        return clone
 372
 373    def _get_col(self, target, field, alias):
 374        if not self.alias_cols:
 375            alias = None
 376        return target.get_col(alias, field)
 377
 378    def get_aggregation(self, aggregate_exprs):
 379        """
 380        Return the dictionary with the values of the existing aggregations.
 381        """
 382        if not aggregate_exprs:
 383            return {}
 384        aggregates = {}
 385        for alias, aggregate_expr in aggregate_exprs.items():
 386            self.check_alias(alias)
 387            aggregate = aggregate_expr.resolve_expression(
 388                self, allow_joins=True, reuse=None, summarize=True
 389            )
 390            if not aggregate.contains_aggregate:
 391                raise TypeError(f"{alias} is not an aggregate expression")
 392            aggregates[alias] = aggregate
 393        # Existing usage of aggregation can be determined by the presence of
 394        # selected aggregates but also by filters against aliased aggregates.
 395        _, having, qualify = self.where.split_having_qualify()
 396        has_existing_aggregation = (
 397            any(
 398                getattr(annotation, "contains_aggregate", True)
 399                for annotation in self.annotations.values()
 400            )
 401            or having
 402        )
 403        # Decide if we need to use a subquery.
 404        #
 405        # Existing aggregations would cause incorrect results as
 406        # get_aggregation() must produce just one result and thus must not use
 407        # GROUP BY.
 408        #
 409        # If the query has limit or distinct, or uses set operations, then
 410        # those operations must be done in a subquery so that the query
 411        # aggregates on the limit and/or distinct results instead of applying
 412        # the distinct and limit after the aggregation.
 413        if (
 414            isinstance(self.group_by, tuple)
 415            or self.is_sliced
 416            or has_existing_aggregation
 417            or qualify
 418            or self.distinct
 419            or self.combinator
 420        ):
 421            from plain.models.sql.subqueries import AggregateQuery
 422
 423            inner_query = self.clone()
 424            inner_query.subquery = True
 425            outer_query = AggregateQuery(self.model, inner_query)
 426            inner_query.select_for_update = False
 427            inner_query.select_related = False
 428            inner_query.set_annotation_mask(self.annotation_select)
 429            # Queries with distinct_fields need ordering and when a limit is
 430            # applied we must take the slice from the ordered query. Otherwise
 431            # no need for ordering.
 432            inner_query.clear_ordering(force=False)
 433            if not inner_query.distinct:
 434                # If the inner query uses default select and it has some
 435                # aggregate annotations, then we must make sure the inner
 436                # query is grouped by the main model's primary key. However,
 437                # clearing the select clause can alter results if distinct is
 438                # used.
 439                if inner_query.default_cols and has_existing_aggregation:
 440                    inner_query.group_by = (
 441                        self.model._meta.get_field("id").get_col(
 442                            inner_query.get_initial_alias()
 443                        ),
 444                    )
 445                inner_query.default_cols = False
 446                if not qualify:
 447                    # Mask existing annotations that are not referenced by
 448                    # aggregates to be pushed to the outer query unless
 449                    # filtering against window functions is involved as it
 450                    # requires complex realising.
 451                    annotation_mask = set()
 452                    for aggregate in aggregates.values():
 453                        annotation_mask |= aggregate.get_refs()
 454                    inner_query.set_annotation_mask(annotation_mask)
 455
 456            # Add aggregates to the outer AggregateQuery. This requires making
 457            # sure all columns referenced by the aggregates are selected in the
 458            # inner query. It is achieved by retrieving all column references
 459            # by the aggregates, explicitly selecting them in the inner query,
 460            # and making sure the aggregates are repointed to them.
 461            col_refs = {}
 462            for alias, aggregate in aggregates.items():
 463                replacements = {}
 464                for col in self._gen_cols([aggregate], resolve_refs=False):
 465                    if not (col_ref := col_refs.get(col)):
 466                        index = len(col_refs) + 1
 467                        col_alias = f"__col{index}"
 468                        col_ref = Ref(col_alias, col)
 469                        col_refs[col] = col_ref
 470                        inner_query.annotations[col_alias] = col
 471                        inner_query.append_annotation_mask([col_alias])
 472                    replacements[col] = col_ref
 473                outer_query.annotations[alias] = aggregate.replace_expressions(
 474                    replacements
 475                )
 476            if (
 477                inner_query.select == ()
 478                and not inner_query.default_cols
 479                and not inner_query.annotation_select_mask
 480            ):
 481                # In case of Model.objects[0:3].count(), there would be no
 482                # field selected in the inner query, yet we must use a subquery.
 483                # So, make sure at least one field is selected.
 484                inner_query.select = (
 485                    self.model._meta.get_field("id").get_col(
 486                        inner_query.get_initial_alias()
 487                    ),
 488                )
 489        else:
 490            outer_query = self
 491            self.select = ()
 492            self.default_cols = False
 493            self.extra = {}
 494            if self.annotations:
 495                # Inline reference to existing annotations and mask them as
 496                # they are unnecessary given only the summarized aggregations
 497                # are requested.
 498                replacements = {
 499                    Ref(alias, annotation): annotation
 500                    for alias, annotation in self.annotations.items()
 501                }
 502                self.annotations = {
 503                    alias: aggregate.replace_expressions(replacements)
 504                    for alias, aggregate in aggregates.items()
 505                }
 506            else:
 507                self.annotations = aggregates
 508            self.set_annotation_mask(aggregates)
 509
 510        empty_set_result = [
 511            expression.empty_result_set_value
 512            for expression in outer_query.annotation_select.values()
 513        ]
 514        elide_empty = not any(result is NotImplemented for result in empty_set_result)
 515        outer_query.clear_ordering(force=True)
 516        outer_query.clear_limits()
 517        outer_query.select_for_update = False
 518        outer_query.select_related = False
 519        compiler = outer_query.get_compiler(elide_empty=elide_empty)
 520        result = compiler.execute_sql(SINGLE)
 521        if result is None:
 522            result = empty_set_result
 523        else:
 524            converters = compiler.get_converters(outer_query.annotation_select.values())
 525            result = next(compiler.apply_converters((result,), converters))
 526
 527        return dict(zip(outer_query.annotation_select, result))
 528
 529    def get_count(self):
 530        """
 531        Perform a COUNT() query using the current filter constraints.
 532        """
 533        obj = self.clone()
 534        return obj.get_aggregation({"__count": Count("*")})["__count"]
 535
 536    def has_filters(self):
 537        return self.where
 538
 539    def exists(self, limit=True):
 540        q = self.clone()
 541        if not (q.distinct and q.is_sliced):
 542            if q.group_by is True:
 543                q.add_fields(
 544                    (f.attname for f in self.model._meta.concrete_fields), False
 545                )
 546                # Disable GROUP BY aliases to avoid orphaning references to the
 547                # SELECT clause which is about to be cleared.
 548                q.set_group_by(allow_aliases=False)
 549            q.clear_select_clause()
 550        if q.combined_queries and q.combinator == "union":
 551            q.combined_queries = tuple(
 552                combined_query.exists(limit=False)
 553                for combined_query in q.combined_queries
 554            )
 555        q.clear_ordering(force=True)
 556        if limit:
 557            q.set_limits(high=1)
 558        q.add_annotation(Value(1), "a")
 559        return q
 560
 561    def has_results(self):
 562        q = self.exists()
 563        compiler = q.get_compiler()
 564        return compiler.has_results()
 565
 566    def explain(self, format=None, **options):
 567        q = self.clone()
 568        for option_name in options:
 569            if (
 570                not EXPLAIN_OPTIONS_PATTERN.fullmatch(option_name)
 571                or "--" in option_name
 572            ):
 573                raise ValueError(f"Invalid option name: {option_name!r}.")
 574        q.explain_info = ExplainInfo(format, options)
 575        compiler = q.get_compiler()
 576        return "\n".join(compiler.explain_query())
 577
 578    def combine(self, rhs, connector):
 579        """
 580        Merge the 'rhs' query into the current one (with any 'rhs' effects
 581        being applied *after* (that is, "to the right of") anything in the
 582        current query. 'rhs' is not modified during a call to this function.
 583
 584        The 'connector' parameter describes how to connect filters from the
 585        'rhs' query.
 586        """
 587        if self.model != rhs.model:
 588            raise TypeError("Cannot combine queries on two different base models.")
 589        if self.is_sliced:
 590            raise TypeError("Cannot combine queries once a slice has been taken.")
 591        if self.distinct != rhs.distinct:
 592            raise TypeError("Cannot combine a unique query with a non-unique query.")
 593        if self.distinct_fields != rhs.distinct_fields:
 594            raise TypeError("Cannot combine queries with different distinct fields.")
 595
 596        # If lhs and rhs shares the same alias prefix, it is possible to have
 597        # conflicting alias changes like T4 -> T5, T5 -> T6, which might end up
 598        # as T4 -> T6 while combining two querysets. To prevent this, change an
 599        # alias prefix of the rhs and update current aliases accordingly,
 600        # except if the alias is the base table since it must be present in the
 601        # query on both sides.
 602        initial_alias = self.get_initial_alias()
 603        rhs.bump_prefix(self, exclude={initial_alias})
 604
 605        # Work out how to relabel the rhs aliases, if necessary.
 606        change_map = {}
 607        conjunction = connector == AND
 608
 609        # Determine which existing joins can be reused. When combining the
 610        # query with AND we must recreate all joins for m2m filters. When
 611        # combining with OR we can reuse joins. The reason is that in AND
 612        # case a single row can't fulfill a condition like:
 613        #     revrel__col=1 & revrel__col=2
 614        # But, there might be two different related rows matching this
 615        # condition. In OR case a single True is enough, so single row is
 616        # enough, too.
 617        #
 618        # Note that we will be creating duplicate joins for non-m2m joins in
 619        # the AND case. The results will be correct but this creates too many
 620        # joins. This is something that could be fixed later on.
 621        reuse = set() if conjunction else set(self.alias_map)
 622        joinpromoter = JoinPromoter(connector, 2, False)
 623        joinpromoter.add_votes(
 624            j for j in self.alias_map if self.alias_map[j].join_type == INNER
 625        )
 626        rhs_votes = set()
 627        # Now, add the joins from rhs query into the new query (skipping base
 628        # table).
 629        rhs_tables = list(rhs.alias_map)[1:]
 630        for alias in rhs_tables:
 631            join = rhs.alias_map[alias]
 632            # If the left side of the join was already relabeled, use the
 633            # updated alias.
 634            join = join.relabeled_clone(change_map)
 635            new_alias = self.join(join, reuse=reuse)
 636            if join.join_type == INNER:
 637                rhs_votes.add(new_alias)
 638            # We can't reuse the same join again in the query. If we have two
 639            # distinct joins for the same connection in rhs query, then the
 640            # combined query must have two joins, too.
 641            reuse.discard(new_alias)
 642            if alias != new_alias:
 643                change_map[alias] = new_alias
 644            if not rhs.alias_refcount[alias]:
 645                # The alias was unused in the rhs query. Unref it so that it
 646                # will be unused in the new query, too. We have to add and
 647                # unref the alias so that join promotion has information of
 648                # the join type for the unused alias.
 649                self.unref_alias(new_alias)
 650        joinpromoter.add_votes(rhs_votes)
 651        joinpromoter.update_join_types(self)
 652
 653        # Combine subqueries aliases to ensure aliases relabelling properly
 654        # handle subqueries when combining where and select clauses.
 655        self.subq_aliases |= rhs.subq_aliases
 656
 657        # Now relabel a copy of the rhs where-clause and add it to the current
 658        # one.
 659        w = rhs.where.clone()
 660        w.relabel_aliases(change_map)
 661        self.where.add(w, connector)
 662
 663        # Selection columns and extra extensions are those provided by 'rhs'.
 664        if rhs.select:
 665            self.set_select([col.relabeled_clone(change_map) for col in rhs.select])
 666        else:
 667            self.select = ()
 668
 669        if connector == OR:
 670            # It would be nice to be able to handle this, but the queries don't
 671            # really make sense (or return consistent value sets). Not worth
 672            # the extra complexity when you can write a real query instead.
 673            if self.extra and rhs.extra:
 674                raise ValueError(
 675                    "When merging querysets using 'or', you cannot have "
 676                    "extra(select=...) on both sides."
 677                )
 678        self.extra.update(rhs.extra)
 679        extra_select_mask = set()
 680        if self.extra_select_mask is not None:
 681            extra_select_mask.update(self.extra_select_mask)
 682        if rhs.extra_select_mask is not None:
 683            extra_select_mask.update(rhs.extra_select_mask)
 684        if extra_select_mask:
 685            self.set_extra_mask(extra_select_mask)
 686        self.extra_tables += rhs.extra_tables
 687
 688        # Ordering uses the 'rhs' ordering, unless it has none, in which case
 689        # the current ordering is used.
 690        self.order_by = rhs.order_by or self.order_by
 691        self.extra_order_by = rhs.extra_order_by or self.extra_order_by
 692
 693    def _get_defer_select_mask(self, opts, mask, select_mask=None):
 694        if select_mask is None:
 695            select_mask = {}
 696        select_mask[opts.get_field("id")] = {}
 697        # All concrete fields that are not part of the defer mask must be
 698        # loaded. If a relational field is encountered it gets added to the
 699        # mask for it be considered if `select_related` and the cycle continues
 700        # by recursively calling this function.
 701        for field in opts.concrete_fields:
 702            field_mask = mask.pop(field.name, None)
 703            if field_mask is None:
 704                select_mask.setdefault(field, {})
 705            elif field_mask:
 706                if not field.is_relation:
 707                    raise FieldError(next(iter(field_mask)))
 708                field_select_mask = select_mask.setdefault(field, {})
 709                related_model = field.remote_field.model._meta.concrete_model
 710                self._get_defer_select_mask(
 711                    related_model._meta, field_mask, field_select_mask
 712                )
 713        # Remaining defer entries must be references to reverse relationships.
 714        # The following code is expected to raise FieldError if it encounters
 715        # a malformed defer entry.
 716        for field_name, field_mask in mask.items():
 717            if filtered_relation := self._filtered_relations.get(field_name):
 718                relation = opts.get_field(filtered_relation.relation_name)
 719                field_select_mask = select_mask.setdefault((field_name, relation), {})
 720                field = relation.field
 721            else:
 722                field = opts.get_field(field_name).field
 723                field_select_mask = select_mask.setdefault(field, {})
 724            related_model = field.model._meta.concrete_model
 725            self._get_defer_select_mask(
 726                related_model._meta, field_mask, field_select_mask
 727            )
 728        return select_mask
 729
 730    def _get_only_select_mask(self, opts, mask, select_mask=None):
 731        if select_mask is None:
 732            select_mask = {}
 733        select_mask[opts.get_field("id")] = {}
 734        # Only include fields mentioned in the mask.
 735        for field_name, field_mask in mask.items():
 736            field = opts.get_field(field_name)
 737            field_select_mask = select_mask.setdefault(field, {})
 738            if field_mask:
 739                if not field.is_relation:
 740                    raise FieldError(next(iter(field_mask)))
 741                related_model = field.remote_field.model._meta.concrete_model
 742                self._get_only_select_mask(
 743                    related_model._meta, field_mask, field_select_mask
 744                )
 745        return select_mask
 746
 747    def get_select_mask(self):
 748        """
 749        Convert the self.deferred_loading data structure to an alternate data
 750        structure, describing the field that *will* be loaded. This is used to
 751        compute the columns to select from the database and also by the
 752        QuerySet class to work out which fields are being initialized on each
 753        model. Models that have all their fields included aren't mentioned in
 754        the result, only those that have field restrictions in place.
 755        """
 756        field_names, defer = self.deferred_loading
 757        if not field_names:
 758            return {}
 759        mask = {}
 760        for field_name in field_names:
 761            part_mask = mask
 762            for part in field_name.split(LOOKUP_SEP):
 763                part_mask = part_mask.setdefault(part, {})
 764        opts = self.get_meta()
 765        if defer:
 766            return self._get_defer_select_mask(opts, mask)
 767        return self._get_only_select_mask(opts, mask)
 768
 769    def table_alias(self, table_name, create=False, filtered_relation=None):
 770        """
 771        Return a table alias for the given table_name and whether this is a
 772        new alias or not.
 773
 774        If 'create' is true, a new alias is always created. Otherwise, the
 775        most recently created alias for the table (if one exists) is reused.
 776        """
 777        alias_list = self.table_map.get(table_name)
 778        if not create and alias_list:
 779            alias = alias_list[0]
 780            self.alias_refcount[alias] += 1
 781            return alias, False
 782
 783        # Create a new alias for this table.
 784        if alias_list:
 785            alias = "%s%d" % (self.alias_prefix, len(self.alias_map) + 1)  # noqa: UP031
 786            alias_list.append(alias)
 787        else:
 788            # The first occurrence of a table uses the table name directly.
 789            alias = (
 790                filtered_relation.alias if filtered_relation is not None else table_name
 791            )
 792            self.table_map[table_name] = [alias]
 793        self.alias_refcount[alias] = 1
 794        return alias, True
 795
 796    def ref_alias(self, alias):
 797        """Increases the reference count for this alias."""
 798        self.alias_refcount[alias] += 1
 799
 800    def unref_alias(self, alias, amount=1):
 801        """Decreases the reference count for this alias."""
 802        self.alias_refcount[alias] -= amount
 803
 804    def promote_joins(self, aliases):
 805        """
 806        Promote recursively the join type of given aliases and its children to
 807        an outer join. If 'unconditional' is False, only promote the join if
 808        it is nullable or the parent join is an outer join.
 809
 810        The children promotion is done to avoid join chains that contain a LOUTER
 811        b INNER c. So, if we have currently a INNER b INNER c and a->b is promoted,
 812        then we must also promote b->c automatically, or otherwise the promotion
 813        of a->b doesn't actually change anything in the query results.
 814        """
 815        aliases = list(aliases)
 816        while aliases:
 817            alias = aliases.pop(0)
 818            if self.alias_map[alias].join_type is None:
 819                # This is the base table (first FROM entry) - this table
 820                # isn't really joined at all in the query, so we should not
 821                # alter its join type.
 822                continue
 823            # Only the first alias (skipped above) should have None join_type
 824            assert self.alias_map[alias].join_type is not None
 825            parent_alias = self.alias_map[alias].parent_alias
 826            parent_louter = (
 827                parent_alias and self.alias_map[parent_alias].join_type == LOUTER
 828            )
 829            already_louter = self.alias_map[alias].join_type == LOUTER
 830            if (self.alias_map[alias].nullable or parent_louter) and not already_louter:
 831                self.alias_map[alias] = self.alias_map[alias].promote()
 832                # Join type of 'alias' changed, so re-examine all aliases that
 833                # refer to this one.
 834                aliases.extend(
 835                    join
 836                    for join in self.alias_map
 837                    if self.alias_map[join].parent_alias == alias
 838                    and join not in aliases
 839                )
 840
 841    def demote_joins(self, aliases):
 842        """
 843        Change join type from LOUTER to INNER for all joins in aliases.
 844
 845        Similarly to promote_joins(), this method must ensure no join chains
 846        containing first an outer, then an inner join are generated. If we
 847        are demoting b->c join in chain a LOUTER b LOUTER c then we must
 848        demote a->b automatically, or otherwise the demotion of b->c doesn't
 849        actually change anything in the query results. .
 850        """
 851        aliases = list(aliases)
 852        while aliases:
 853            alias = aliases.pop(0)
 854            if self.alias_map[alias].join_type == LOUTER:
 855                self.alias_map[alias] = self.alias_map[alias].demote()
 856                parent_alias = self.alias_map[alias].parent_alias
 857                if self.alias_map[parent_alias].join_type == INNER:
 858                    aliases.append(parent_alias)
 859
 860    def reset_refcounts(self, to_counts):
 861        """
 862        Reset reference counts for aliases so that they match the value passed
 863        in `to_counts`.
 864        """
 865        for alias, cur_refcount in self.alias_refcount.copy().items():
 866            unref_amount = cur_refcount - to_counts.get(alias, 0)
 867            self.unref_alias(alias, unref_amount)
 868
 869    def change_aliases(self, change_map):
 870        """
 871        Change the aliases in change_map (which maps old-alias -> new-alias),
 872        relabelling any references to them in select columns and the where
 873        clause.
 874        """
 875        # If keys and values of change_map were to intersect, an alias might be
 876        # updated twice (e.g. T4 -> T5, T5 -> T6, so also T4 -> T6) depending
 877        # on their order in change_map.
 878        assert set(change_map).isdisjoint(change_map.values())
 879
 880        # 1. Update references in "select" (normal columns plus aliases),
 881        # "group by" and "where".
 882        self.where.relabel_aliases(change_map)
 883        if isinstance(self.group_by, tuple):
 884            self.group_by = tuple(
 885                [col.relabeled_clone(change_map) for col in self.group_by]
 886            )
 887        self.select = tuple([col.relabeled_clone(change_map) for col in self.select])
 888        self.annotations = self.annotations and {
 889            key: col.relabeled_clone(change_map)
 890            for key, col in self.annotations.items()
 891        }
 892
 893        # 2. Rename the alias in the internal table/alias datastructures.
 894        for old_alias, new_alias in change_map.items():
 895            if old_alias not in self.alias_map:
 896                continue
 897            alias_data = self.alias_map[old_alias].relabeled_clone(change_map)
 898            self.alias_map[new_alias] = alias_data
 899            self.alias_refcount[new_alias] = self.alias_refcount[old_alias]
 900            del self.alias_refcount[old_alias]
 901            del self.alias_map[old_alias]
 902
 903            table_aliases = self.table_map[alias_data.table_name]
 904            for pos, alias in enumerate(table_aliases):
 905                if alias == old_alias:
 906                    table_aliases[pos] = new_alias
 907                    break
 908        self.external_aliases = {
 909            # Table is aliased or it's being changed and thus is aliased.
 910            change_map.get(alias, alias): (aliased or alias in change_map)
 911            for alias, aliased in self.external_aliases.items()
 912        }
 913
 914    def bump_prefix(self, other_query, exclude=None):
 915        """
 916        Change the alias prefix to the next letter in the alphabet in a way
 917        that the other query's aliases and this query's aliases will not
 918        conflict. Even tables that previously had no alias will get an alias
 919        after this call. To prevent changing aliases use the exclude parameter.
 920        """
 921
 922        def prefix_gen():
 923            """
 924            Generate a sequence of characters in alphabetical order:
 925                -> 'A', 'B', 'C', ...
 926
 927            When the alphabet is finished, the sequence will continue with the
 928            Cartesian product:
 929                -> 'AA', 'AB', 'AC', ...
 930            """
 931            alphabet = ascii_uppercase
 932            prefix = chr(ord(self.alias_prefix) + 1)
 933            yield prefix
 934            for n in count(1):
 935                seq = alphabet[alphabet.index(prefix) :] if prefix else alphabet
 936                for s in product(seq, repeat=n):
 937                    yield "".join(s)
 938                prefix = None
 939
 940        if self.alias_prefix != other_query.alias_prefix:
 941            # No clashes between self and outer query should be possible.
 942            return
 943
 944        # Explicitly avoid infinite loop. The constant divider is based on how
 945        # much depth recursive subquery references add to the stack. This value
 946        # might need to be adjusted when adding or removing function calls from
 947        # the code path in charge of performing these operations.
 948        local_recursion_limit = sys.getrecursionlimit() // 16
 949        for pos, prefix in enumerate(prefix_gen()):
 950            if prefix not in self.subq_aliases:
 951                self.alias_prefix = prefix
 952                break
 953            if pos > local_recursion_limit:
 954                raise RecursionError(
 955                    "Maximum recursion depth exceeded: too many subqueries."
 956                )
 957        self.subq_aliases = self.subq_aliases.union([self.alias_prefix])
 958        other_query.subq_aliases = other_query.subq_aliases.union(self.subq_aliases)
 959        if exclude is None:
 960            exclude = {}
 961        self.change_aliases(
 962            {
 963                alias: "%s%d" % (self.alias_prefix, pos)  # noqa: UP031
 964                for pos, alias in enumerate(self.alias_map)
 965                if alias not in exclude
 966            }
 967        )
 968
 969    def get_initial_alias(self):
 970        """
 971        Return the first alias for this query, after increasing its reference
 972        count.
 973        """
 974        if self.alias_map:
 975            alias = self.base_table
 976            self.ref_alias(alias)
 977        elif self.model:
 978            alias = self.join(self.base_table_class(self.get_meta().db_table, None))
 979        else:
 980            alias = None
 981        return alias
 982
 983    def count_active_tables(self):
 984        """
 985        Return the number of tables in this query with a non-zero reference
 986        count. After execution, the reference counts are zeroed, so tables
 987        added in compiler will not be seen by this method.
 988        """
 989        return len([1 for count in self.alias_refcount.values() if count])
 990
 991    def join(self, join, reuse=None, reuse_with_filtered_relation=False):
 992        """
 993        Return an alias for the 'join', either reusing an existing alias for
 994        that join or creating a new one. 'join' is either a base_table_class or
 995        join_class.
 996
 997        The 'reuse' parameter can be either None which means all joins are
 998        reusable, or it can be a set containing the aliases that can be reused.
 999
1000        The 'reuse_with_filtered_relation' parameter is used when computing
1001        FilteredRelation instances.
1002
1003        A join is always created as LOUTER if the lhs alias is LOUTER to make
1004        sure chains like t1 LOUTER t2 INNER t3 aren't generated. All new
1005        joins are created as LOUTER if the join is nullable.
1006        """
1007        if reuse_with_filtered_relation and reuse:
1008            reuse_aliases = [
1009                a for a, j in self.alias_map.items() if a in reuse and j.equals(join)
1010            ]
1011        else:
1012            reuse_aliases = [
1013                a
1014                for a, j in self.alias_map.items()
1015                if (reuse is None or a in reuse) and j == join
1016            ]
1017        if reuse_aliases:
1018            if join.table_alias in reuse_aliases:
1019                reuse_alias = join.table_alias
1020            else:
1021                # Reuse the most recent alias of the joined table
1022                # (a many-to-many relation may be joined multiple times).
1023                reuse_alias = reuse_aliases[-1]
1024            self.ref_alias(reuse_alias)
1025            return reuse_alias
1026
1027        # No reuse is possible, so we need a new alias.
1028        alias, _ = self.table_alias(
1029            join.table_name, create=True, filtered_relation=join.filtered_relation
1030        )
1031        if join.join_type:
1032            if self.alias_map[join.parent_alias].join_type == LOUTER or join.nullable:
1033                join_type = LOUTER
1034            else:
1035                join_type = INNER
1036            join.join_type = join_type
1037        join.table_alias = alias
1038        self.alias_map[alias] = join
1039        return alias
1040
1041    def check_alias(self, alias):
1042        if FORBIDDEN_ALIAS_PATTERN.search(alias):
1043            raise ValueError(
1044                "Column aliases cannot contain whitespace characters, quotation marks, "
1045                "semicolons, or SQL comments."
1046            )
1047
1048    def add_annotation(self, annotation, alias, select=True):
1049        """Add a single annotation expression to the Query."""
1050        self.check_alias(alias)
1051        annotation = annotation.resolve_expression(self, allow_joins=True, reuse=None)
1052        if select:
1053            self.append_annotation_mask([alias])
1054        else:
1055            self.set_annotation_mask(set(self.annotation_select).difference({alias}))
1056        self.annotations[alias] = annotation
1057
1058    def resolve_expression(self, query, *args, **kwargs):
1059        clone = self.clone()
1060        # Subqueries need to use a different set of aliases than the outer query.
1061        clone.bump_prefix(query)
1062        clone.subquery = True
1063        clone.where.resolve_expression(query, *args, **kwargs)
1064        # Resolve combined queries.
1065        if clone.combinator:
1066            clone.combined_queries = tuple(
1067                [
1068                    combined_query.resolve_expression(query, *args, **kwargs)
1069                    for combined_query in clone.combined_queries
1070                ]
1071            )
1072        for key, value in clone.annotations.items():
1073            resolved = value.resolve_expression(query, *args, **kwargs)
1074            if hasattr(resolved, "external_aliases"):
1075                resolved.external_aliases.update(clone.external_aliases)
1076            clone.annotations[key] = resolved
1077        # Outer query's aliases are considered external.
1078        for alias, table in query.alias_map.items():
1079            clone.external_aliases[alias] = (
1080                isinstance(table, Join)
1081                and table.join_field.related_model._meta.db_table != alias
1082            ) or (
1083                isinstance(table, BaseTable) and table.table_name != table.table_alias
1084            )
1085        return clone
1086
1087    def get_external_cols(self):
1088        exprs = chain(self.annotations.values(), self.where.children)
1089        return [
1090            col
1091            for col in self._gen_cols(exprs, include_external=True)
1092            if col.alias in self.external_aliases
1093        ]
1094
1095    def get_group_by_cols(self, wrapper=None):
1096        # If wrapper is referenced by an alias for an explicit GROUP BY through
1097        # values() a reference to this expression and not the self must be
1098        # returned to ensure external column references are not grouped against
1099        # as well.
1100        external_cols = self.get_external_cols()
1101        if any(col.possibly_multivalued for col in external_cols):
1102            return [wrapper or self]
1103        return external_cols
1104
1105    def as_sql(self, compiler, connection):
1106        # Some backends (e.g. Oracle) raise an error when a subquery contains
1107        # unnecessary ORDER BY clause.
1108        if (
1109            self.subquery
1110            and not db_connection.features.ignores_unnecessary_order_by_in_subqueries
1111        ):
1112            self.clear_ordering(force=False)
1113            for query in self.combined_queries:
1114                query.clear_ordering(force=False)
1115        sql, params = self.get_compiler().as_sql()
1116        if self.subquery:
1117            sql = f"({sql})"
1118        return sql, params
1119
1120    def resolve_lookup_value(self, value, can_reuse, allow_joins):
1121        if hasattr(value, "resolve_expression"):
1122            value = value.resolve_expression(
1123                self,
1124                reuse=can_reuse,
1125                allow_joins=allow_joins,
1126            )
1127        elif isinstance(value, list | tuple):
1128            # The items of the iterable may be expressions and therefore need
1129            # to be resolved independently.
1130            values = (
1131                self.resolve_lookup_value(sub_value, can_reuse, allow_joins)
1132                for sub_value in value
1133            )
1134            type_ = type(value)
1135            if hasattr(type_, "_make"):  # namedtuple
1136                return type_(*values)
1137            return type_(values)
1138        return value
1139
1140    def solve_lookup_type(self, lookup, summarize=False):
1141        """
1142        Solve the lookup type from the lookup (e.g.: 'foobar__id__icontains').
1143        """
1144        lookup_splitted = lookup.split(LOOKUP_SEP)
1145        if self.annotations:
1146            annotation, expression_lookups = refs_expression(
1147                lookup_splitted, self.annotations
1148            )
1149            if annotation:
1150                expression = self.annotations[annotation]
1151                if summarize:
1152                    expression = Ref(annotation, expression)
1153                return expression_lookups, (), expression
1154        _, field, _, lookup_parts = self.names_to_path(lookup_splitted, self.get_meta())
1155        field_parts = lookup_splitted[0 : len(lookup_splitted) - len(lookup_parts)]
1156        if len(lookup_parts) > 1 and not field_parts:
1157            raise FieldError(
1158                f'Invalid lookup "{lookup}" for model {self.get_meta().model.__name__}".'
1159            )
1160        return lookup_parts, field_parts, False
1161
1162    def check_query_object_type(self, value, opts, field):
1163        """
1164        Check whether the object passed while querying is of the correct type.
1165        If not, raise a ValueError specifying the wrong object.
1166        """
1167        if hasattr(value, "_meta"):
1168            if not check_rel_lookup_compatibility(value._meta.model, opts, field):
1169                raise ValueError(
1170                    f'Cannot query "{value}": Must be "{opts.object_name}" instance.'
1171                )
1172
1173    def check_related_objects(self, field, value, opts):
1174        """Check the type of object passed to query relations."""
1175        if field.is_relation:
1176            # Check that the field and the queryset use the same model in a
1177            # query like .filter(author=Author.query.all()). For example, the
1178            # opts would be Author's (from the author field) and value.model
1179            # would be Author.query.all() queryset's .model (Author also).
1180            # The field is the related field on the lhs side.
1181            if (
1182                isinstance(value, Query)
1183                and not value.has_select_fields
1184                and not check_rel_lookup_compatibility(value.model, opts, field)
1185            ):
1186                raise ValueError(
1187                    f'Cannot use QuerySet for "{value.model._meta.object_name}": Use a QuerySet for "{opts.object_name}".'
1188                )
1189            elif hasattr(value, "_meta"):
1190                self.check_query_object_type(value, opts, field)
1191            elif hasattr(value, "__iter__"):
1192                for v in value:
1193                    self.check_query_object_type(v, opts, field)
1194
1195    def check_filterable(self, expression):
1196        """Raise an error if expression cannot be used in a WHERE clause."""
1197        if hasattr(expression, "resolve_expression") and not getattr(
1198            expression, "filterable", True
1199        ):
1200            raise NotSupportedError(
1201                expression.__class__.__name__ + " is disallowed in the filter clause."
1202            )
1203        if hasattr(expression, "get_source_expressions"):
1204            for expr in expression.get_source_expressions():
1205                self.check_filterable(expr)
1206
1207    def build_lookup(self, lookups, lhs, rhs):
1208        """
1209        Try to extract transforms and lookup from given lhs.
1210
1211        The lhs value is something that works like SQLExpression.
1212        The rhs value is what the lookup is going to compare against.
1213        The lookups is a list of names to extract using get_lookup()
1214        and get_transform().
1215        """
1216        # __exact is the default lookup if one isn't given.
1217        *transforms, lookup_name = lookups or ["exact"]
1218        for name in transforms:
1219            lhs = self.try_transform(lhs, name)
1220        # First try get_lookup() so that the lookup takes precedence if the lhs
1221        # supports both transform and lookup for the name.
1222        lookup_class = lhs.get_lookup(lookup_name)
1223        if not lookup_class:
1224            # A lookup wasn't found. Try to interpret the name as a transform
1225            # and do an Exact lookup against it.
1226            lhs = self.try_transform(lhs, lookup_name)
1227            lookup_name = "exact"
1228            lookup_class = lhs.get_lookup(lookup_name)
1229            if not lookup_class:
1230                return
1231
1232        lookup = lookup_class(lhs, rhs)
1233        # Interpret '__exact=None' as the sql 'is NULL'; otherwise, reject all
1234        # uses of None as a query value unless the lookup supports it.
1235        if lookup.rhs is None and not lookup.can_use_none_as_rhs:
1236            if lookup_name not in ("exact", "iexact"):
1237                raise ValueError("Cannot use None as a query value")
1238            return lhs.get_lookup("isnull")(lhs, True)
1239
1240        return lookup
1241
1242    def try_transform(self, lhs, name):
1243        """
1244        Helper method for build_lookup(). Try to fetch and initialize
1245        a transform for name parameter from lhs.
1246        """
1247        transform_class = lhs.get_transform(name)
1248        if transform_class:
1249            return transform_class(lhs)
1250        else:
1251            output_field = lhs.output_field.__class__
1252            suggested_lookups = difflib.get_close_matches(
1253                name, output_field.get_lookups()
1254            )
1255            if suggested_lookups:
1256                suggestion = ", perhaps you meant {}?".format(
1257                    " or ".join(suggested_lookups)
1258                )
1259            else:
1260                suggestion = "."
1261            raise FieldError(
1262                f"Unsupported lookup '{name}' for {output_field.__name__} or join on the field not "
1263                f"permitted{suggestion}"
1264            )
1265
1266    def build_filter(
1267        self,
1268        filter_expr,
1269        branch_negated=False,
1270        current_negated=False,
1271        can_reuse=None,
1272        allow_joins=True,
1273        split_subq=True,
1274        reuse_with_filtered_relation=False,
1275        check_filterable=True,
1276        summarize=False,
1277    ):
1278        """
1279        Build a WhereNode for a single filter clause but don't add it
1280        to this Query. Query.add_q() will then add this filter to the where
1281        Node.
1282
1283        The 'branch_negated' tells us if the current branch contains any
1284        negations. This will be used to determine if subqueries are needed.
1285
1286        The 'current_negated' is used to determine if the current filter is
1287        negated or not and this will be used to determine if IS NULL filtering
1288        is needed.
1289
1290        The difference between current_negated and branch_negated is that
1291        branch_negated is set on first negation, but current_negated is
1292        flipped for each negation.
1293
1294        Note that add_filter will not do any negating itself, that is done
1295        upper in the code by add_q().
1296
1297        The 'can_reuse' is a set of reusable joins for multijoins.
1298
1299        If 'reuse_with_filtered_relation' is True, then only joins in can_reuse
1300        will be reused.
1301
1302        The method will create a filter clause that can be added to the current
1303        query. However, if the filter isn't added to the query then the caller
1304        is responsible for unreffing the joins used.
1305        """
1306        if isinstance(filter_expr, dict):
1307            raise FieldError("Cannot parse keyword query as dict")
1308        if isinstance(filter_expr, Q):
1309            return self._add_q(
1310                filter_expr,
1311                branch_negated=branch_negated,
1312                current_negated=current_negated,
1313                used_aliases=can_reuse,
1314                allow_joins=allow_joins,
1315                split_subq=split_subq,
1316                check_filterable=check_filterable,
1317                summarize=summarize,
1318            )
1319        if hasattr(filter_expr, "resolve_expression"):
1320            if not getattr(filter_expr, "conditional", False):
1321                raise TypeError("Cannot filter against a non-conditional expression.")
1322            condition = filter_expr.resolve_expression(
1323                self, allow_joins=allow_joins, summarize=summarize
1324            )
1325            if not isinstance(condition, Lookup):
1326                condition = self.build_lookup(["exact"], condition, True)
1327            return WhereNode([condition], connector=AND), []
1328        arg, value = filter_expr
1329        if not arg:
1330            raise FieldError(f"Cannot parse keyword query {arg!r}")
1331        lookups, parts, reffed_expression = self.solve_lookup_type(arg, summarize)
1332
1333        if check_filterable:
1334            self.check_filterable(reffed_expression)
1335
1336        if not allow_joins and len(parts) > 1:
1337            raise FieldError("Joined field references are not permitted in this query")
1338
1339        pre_joins = self.alias_refcount.copy()
1340        value = self.resolve_lookup_value(value, can_reuse, allow_joins)
1341        used_joins = {
1342            k for k, v in self.alias_refcount.items() if v > pre_joins.get(k, 0)
1343        }
1344
1345        if check_filterable:
1346            self.check_filterable(value)
1347
1348        if reffed_expression:
1349            condition = self.build_lookup(lookups, reffed_expression, value)
1350            return WhereNode([condition], connector=AND), []
1351
1352        opts = self.get_meta()
1353        alias = self.get_initial_alias()
1354        allow_many = not branch_negated or not split_subq
1355
1356        try:
1357            join_info = self.setup_joins(
1358                parts,
1359                opts,
1360                alias,
1361                can_reuse=can_reuse,
1362                allow_many=allow_many,
1363                reuse_with_filtered_relation=reuse_with_filtered_relation,
1364            )
1365
1366            # Prevent iterator from being consumed by check_related_objects()
1367            if isinstance(value, Iterator):
1368                value = list(value)
1369            self.check_related_objects(join_info.final_field, value, join_info.opts)
1370
1371            # split_exclude() needs to know which joins were generated for the
1372            # lookup parts
1373            self._lookup_joins = join_info.joins
1374        except MultiJoin as e:
1375            return self.split_exclude(filter_expr, can_reuse, e.names_with_path)
1376
1377        # Update used_joins before trimming since they are reused to determine
1378        # which joins could be later promoted to INNER.
1379        used_joins.update(join_info.joins)
1380        targets, alias, join_list = self.trim_joins(
1381            join_info.targets, join_info.joins, join_info.path
1382        )
1383        if can_reuse is not None:
1384            can_reuse.update(join_list)
1385
1386        if join_info.final_field.is_relation:
1387            if len(targets) == 1:
1388                col = self._get_col(targets[0], join_info.final_field, alias)
1389            else:
1390                col = MultiColSource(
1391                    alias, targets, join_info.targets, join_info.final_field
1392                )
1393        else:
1394            col = self._get_col(targets[0], join_info.final_field, alias)
1395
1396        condition = self.build_lookup(lookups, col, value)
1397        lookup_type = condition.lookup_name
1398        clause = WhereNode([condition], connector=AND)
1399
1400        require_outer = (
1401            lookup_type == "isnull" and condition.rhs is True and not current_negated
1402        )
1403        if (
1404            current_negated
1405            and (lookup_type != "isnull" or condition.rhs is False)
1406            and condition.rhs is not None
1407        ):
1408            require_outer = True
1409            if lookup_type != "isnull":
1410                # The condition added here will be SQL like this:
1411                # NOT (col IS NOT NULL), where the first NOT is added in
1412                # upper layers of code. The reason for addition is that if col
1413                # is null, then col != someval will result in SQL "unknown"
1414                # which isn't the same as in Python. The Python None handling
1415                # is wanted, and it can be gotten by
1416                # (col IS NULL OR col != someval)
1417                #   <=>
1418                # NOT (col IS NOT NULL AND col = someval).
1419                if (
1420                    self.is_nullable(targets[0])
1421                    or self.alias_map[join_list[-1]].join_type == LOUTER
1422                ):
1423                    lookup_class = targets[0].get_lookup("isnull")
1424                    col = self._get_col(targets[0], join_info.targets[0], alias)
1425                    clause.add(lookup_class(col, False), AND)
1426                # If someval is a nullable column, someval IS NOT NULL is
1427                # added.
1428                if isinstance(value, Col) and self.is_nullable(value.target):
1429                    lookup_class = value.target.get_lookup("isnull")
1430                    clause.add(lookup_class(value, False), AND)
1431        return clause, used_joins if not require_outer else ()
1432
1433    def add_filter(self, filter_lhs, filter_rhs):
1434        self.add_q(Q((filter_lhs, filter_rhs)))
1435
1436    def add_q(self, q_object):
1437        """
1438        A preprocessor for the internal _add_q(). Responsible for doing final
1439        join promotion.
1440        """
1441        # For join promotion this case is doing an AND for the added q_object
1442        # and existing conditions. So, any existing inner join forces the join
1443        # type to remain inner. Existing outer joins can however be demoted.
1444        # (Consider case where rel_a is LOUTER and rel_a__col=1 is added - if
1445        # rel_a doesn't produce any rows, then the whole condition must fail.
1446        # So, demotion is OK.
1447        existing_inner = {
1448            a for a in self.alias_map if self.alias_map[a].join_type == INNER
1449        }
1450        clause, _ = self._add_q(q_object, self.used_aliases)
1451        if clause:
1452            self.where.add(clause, AND)
1453        self.demote_joins(existing_inner)
1454
1455    def build_where(self, filter_expr):
1456        return self.build_filter(filter_expr, allow_joins=False)[0]
1457
1458    def clear_where(self):
1459        self.where = WhereNode()
1460
1461    def _add_q(
1462        self,
1463        q_object,
1464        used_aliases,
1465        branch_negated=False,
1466        current_negated=False,
1467        allow_joins=True,
1468        split_subq=True,
1469        check_filterable=True,
1470        summarize=False,
1471    ):
1472        """Add a Q-object to the current filter."""
1473        connector = q_object.connector
1474        current_negated ^= q_object.negated
1475        branch_negated = branch_negated or q_object.negated
1476        target_clause = WhereNode(connector=connector, negated=q_object.negated)
1477        joinpromoter = JoinPromoter(
1478            q_object.connector, len(q_object.children), current_negated
1479        )
1480        for child in q_object.children:
1481            child_clause, needed_inner = self.build_filter(
1482                child,
1483                can_reuse=used_aliases,
1484                branch_negated=branch_negated,
1485                current_negated=current_negated,
1486                allow_joins=allow_joins,
1487                split_subq=split_subq,
1488                check_filterable=check_filterable,
1489                summarize=summarize,
1490            )
1491            joinpromoter.add_votes(needed_inner)
1492            if child_clause:
1493                target_clause.add(child_clause, connector)
1494        needed_inner = joinpromoter.update_join_types(self)
1495        return target_clause, needed_inner
1496
1497    def build_filtered_relation_q(
1498        self, q_object, reuse, branch_negated=False, current_negated=False
1499    ):
1500        """Add a FilteredRelation object to the current filter."""
1501        connector = q_object.connector
1502        current_negated ^= q_object.negated
1503        branch_negated = branch_negated or q_object.negated
1504        target_clause = WhereNode(connector=connector, negated=q_object.negated)
1505        for child in q_object.children:
1506            if isinstance(child, Node):
1507                child_clause = self.build_filtered_relation_q(
1508                    child,
1509                    reuse=reuse,
1510                    branch_negated=branch_negated,
1511                    current_negated=current_negated,
1512                )
1513            else:
1514                child_clause, _ = self.build_filter(
1515                    child,
1516                    can_reuse=reuse,
1517                    branch_negated=branch_negated,
1518                    current_negated=current_negated,
1519                    allow_joins=True,
1520                    split_subq=False,
1521                    reuse_with_filtered_relation=True,
1522                )
1523            target_clause.add(child_clause, connector)
1524        return target_clause
1525
1526    def add_filtered_relation(self, filtered_relation, alias):
1527        filtered_relation.alias = alias
1528        lookups = dict(get_children_from_q(filtered_relation.condition))
1529        relation_lookup_parts, relation_field_parts, _ = self.solve_lookup_type(
1530            filtered_relation.relation_name
1531        )
1532        if relation_lookup_parts:
1533            raise ValueError(
1534                "FilteredRelation's relation_name cannot contain lookups "
1535                f"(got {filtered_relation.relation_name!r})."
1536            )
1537        for lookup in chain(lookups):
1538            lookup_parts, lookup_field_parts, _ = self.solve_lookup_type(lookup)
1539            shift = 2 if not lookup_parts else 1
1540            lookup_field_path = lookup_field_parts[:-shift]
1541            for idx, lookup_field_part in enumerate(lookup_field_path):
1542                if len(relation_field_parts) > idx:
1543                    if relation_field_parts[idx] != lookup_field_part:
1544                        raise ValueError(
1545                            "FilteredRelation's condition doesn't support "
1546                            f"relations outside the {filtered_relation.relation_name!r} (got {lookup!r})."
1547                        )
1548                else:
1549                    raise ValueError(
1550                        "FilteredRelation's condition doesn't support nested "
1551                        f"relations deeper than the relation_name (got {lookup!r} for "
1552                        f"{filtered_relation.relation_name!r})."
1553                    )
1554        self._filtered_relations[filtered_relation.alias] = filtered_relation
1555
1556    def names_to_path(self, names, opts, allow_many=True, fail_on_missing=False):
1557        """
1558        Walk the list of names and turns them into PathInfo tuples. A single
1559        name in 'names' can generate multiple PathInfos (m2m, for example).
1560
1561        'names' is the path of names to travel, 'opts' is the model Options we
1562        start the name resolving from, 'allow_many' is as for setup_joins().
1563        If fail_on_missing is set to True, then a name that can't be resolved
1564        will generate a FieldError.
1565
1566        Return a list of PathInfo tuples. In addition return the final field
1567        (the last used join field) and target (which is a field guaranteed to
1568        contain the same value as the final field). Finally, return those names
1569        that weren't found (which are likely transforms and the final lookup).
1570        """
1571        path, names_with_path = [], []
1572        for pos, name in enumerate(names):
1573            cur_names_with_path = (name, [])
1574
1575            field = None
1576            filtered_relation = None
1577            try:
1578                if opts is None:
1579                    raise FieldDoesNotExist
1580                field = opts.get_field(name)
1581            except FieldDoesNotExist:
1582                if name in self.annotation_select:
1583                    field = self.annotation_select[name].output_field
1584                elif name in self._filtered_relations and pos == 0:
1585                    filtered_relation = self._filtered_relations[name]
1586                    if LOOKUP_SEP in filtered_relation.relation_name:
1587                        parts = filtered_relation.relation_name.split(LOOKUP_SEP)
1588                        filtered_relation_path, field, _, _ = self.names_to_path(
1589                            parts,
1590                            opts,
1591                            allow_many,
1592                            fail_on_missing,
1593                        )
1594                        path.extend(filtered_relation_path[:-1])
1595                    else:
1596                        field = opts.get_field(filtered_relation.relation_name)
1597            if field is not None:
1598                # Fields that contain one-to-many relations with a generic
1599                # model (like a GenericForeignKey) cannot generate reverse
1600                # relations and therefore cannot be used for reverse querying.
1601                if field.is_relation and not field.related_model:
1602                    raise FieldError(
1603                        f"Field {name!r} does not generate an automatic reverse "
1604                        "relation and therefore cannot be used for reverse "
1605                        "querying. If it is a GenericForeignKey, consider "
1606                        "adding a GenericRelation."
1607                    )
1608            else:
1609                # We didn't find the current field, so move position back
1610                # one step.
1611                pos -= 1
1612                if pos == -1 or fail_on_missing:
1613                    available = sorted(
1614                        [
1615                            *get_field_names_from_opts(opts),
1616                            *self.annotation_select,
1617                            *self._filtered_relations,
1618                        ]
1619                    )
1620                    raise FieldError(
1621                        "Cannot resolve keyword '{}' into field. "
1622                        "Choices are: {}".format(name, ", ".join(available))
1623                    )
1624                break
1625
1626            if hasattr(field, "path_infos"):
1627                if filtered_relation:
1628                    pathinfos = field.get_path_info(filtered_relation)
1629                else:
1630                    pathinfos = field.path_infos
1631                if not allow_many:
1632                    for inner_pos, p in enumerate(pathinfos):
1633                        if p.m2m:
1634                            cur_names_with_path[1].extend(pathinfos[0 : inner_pos + 1])
1635                            names_with_path.append(cur_names_with_path)
1636                            raise MultiJoin(pos + 1, names_with_path)
1637                last = pathinfos[-1]
1638                path.extend(pathinfos)
1639                final_field = last.join_field
1640                opts = last.to_opts
1641                targets = last.target_fields
1642                cur_names_with_path[1].extend(pathinfos)
1643                names_with_path.append(cur_names_with_path)
1644            else:
1645                # Local non-relational field.
1646                final_field = field
1647                targets = (field,)
1648                if fail_on_missing and pos + 1 != len(names):
1649                    raise FieldError(
1650                        f"Cannot resolve keyword {names[pos + 1]!r} into field. Join on '{name}'"
1651                        " not permitted."
1652                    )
1653                break
1654        return path, final_field, targets, names[pos + 1 :]
1655
1656    def setup_joins(
1657        self,
1658        names,
1659        opts,
1660        alias,
1661        can_reuse=None,
1662        allow_many=True,
1663        reuse_with_filtered_relation=False,
1664    ):
1665        """
1666        Compute the necessary table joins for the passage through the fields
1667        given in 'names'. 'opts' is the Options class for the current model
1668        (which gives the table we are starting from), 'alias' is the alias for
1669        the table to start the joining from.
1670
1671        The 'can_reuse' defines the reverse foreign key joins we can reuse. It
1672        can be None in which case all joins are reusable or a set of aliases
1673        that can be reused. Note that non-reverse foreign keys are always
1674        reusable when using setup_joins().
1675
1676        The 'reuse_with_filtered_relation' can be used to force 'can_reuse'
1677        parameter and force the relation on the given connections.
1678
1679        If 'allow_many' is False, then any reverse foreign key seen will
1680        generate a MultiJoin exception.
1681
1682        Return the final field involved in the joins, the target field (used
1683        for any 'where' constraint), the final 'opts' value, the joins, the
1684        field path traveled to generate the joins, and a transform function
1685        that takes a field and alias and is equivalent to `field.get_col(alias)`
1686        in the simple case but wraps field transforms if they were included in
1687        names.
1688
1689        The target field is the field containing the concrete value. Final
1690        field can be something different, for example foreign key pointing to
1691        that value. Final field is needed for example in some value
1692        conversions (convert 'obj' in fk__id=obj to pk val using the foreign
1693        key field for example).
1694        """
1695        joins = [alias]
1696        # The transform can't be applied yet, as joins must be trimmed later.
1697        # To avoid making every caller of this method look up transforms
1698        # directly, compute transforms here and create a partial that converts
1699        # fields to the appropriate wrapped version.
1700
1701        def final_transformer(field, alias):
1702            if not self.alias_cols:
1703                alias = None
1704            return field.get_col(alias)
1705
1706        # Try resolving all the names as fields first. If there's an error,
1707        # treat trailing names as lookups until a field can be resolved.
1708        last_field_exception = None
1709        for pivot in range(len(names), 0, -1):
1710            try:
1711                path, final_field, targets, rest = self.names_to_path(
1712                    names[:pivot],
1713                    opts,
1714                    allow_many,
1715                    fail_on_missing=True,
1716                )
1717            except FieldError as exc:
1718                if pivot == 1:
1719                    # The first item cannot be a lookup, so it's safe
1720                    # to raise the field error here.
1721                    raise
1722                else:
1723                    last_field_exception = exc
1724            else:
1725                # The transforms are the remaining items that couldn't be
1726                # resolved into fields.
1727                transforms = names[pivot:]
1728                break
1729        for name in transforms:
1730
1731            def transform(field, alias, *, name, previous):
1732                try:
1733                    wrapped = previous(field, alias)
1734                    return self.try_transform(wrapped, name)
1735                except FieldError:
1736                    # FieldError is raised if the transform doesn't exist.
1737                    if isinstance(final_field, Field) and last_field_exception:
1738                        raise last_field_exception
1739                    else:
1740                        raise
1741
1742            final_transformer = functools.partial(
1743                transform, name=name, previous=final_transformer
1744            )
1745            final_transformer.has_transforms = True
1746        # Then, add the path to the query's joins. Note that we can't trim
1747        # joins at this stage - we will need the information about join type
1748        # of the trimmed joins.
1749        for join in path:
1750            if join.filtered_relation:
1751                filtered_relation = join.filtered_relation.clone()
1752                table_alias = filtered_relation.alias
1753            else:
1754                filtered_relation = None
1755                table_alias = None
1756            opts = join.to_opts
1757            if join.direct:
1758                nullable = self.is_nullable(join.join_field)
1759            else:
1760                nullable = True
1761            connection = self.join_class(
1762                opts.db_table,
1763                alias,
1764                table_alias,
1765                INNER,
1766                join.join_field,
1767                nullable,
1768                filtered_relation=filtered_relation,
1769            )
1770            reuse = can_reuse if join.m2m or reuse_with_filtered_relation else None
1771            alias = self.join(
1772                connection,
1773                reuse=reuse,
1774                reuse_with_filtered_relation=reuse_with_filtered_relation,
1775            )
1776            joins.append(alias)
1777            if filtered_relation:
1778                filtered_relation.path = joins[:]
1779        return JoinInfo(final_field, targets, opts, joins, path, final_transformer)
1780
1781    def trim_joins(self, targets, joins, path):
1782        """
1783        The 'target' parameter is the final field being joined to, 'joins'
1784        is the full list of join aliases. The 'path' contain the PathInfos
1785        used to create the joins.
1786
1787        Return the final target field and table alias and the new active
1788        joins.
1789
1790        Always trim any direct join if the target column is already in the
1791        previous table. Can't trim reverse joins as it's unknown if there's
1792        anything on the other side of the join.
1793        """
1794        joins = joins[:]
1795        for pos, info in enumerate(reversed(path)):
1796            if len(joins) == 1 or not info.direct:
1797                break
1798            if info.filtered_relation:
1799                break
1800            join_targets = {t.column for t in info.join_field.foreign_related_fields}
1801            cur_targets = {t.column for t in targets}
1802            if not cur_targets.issubset(join_targets):
1803                break
1804            targets_dict = {
1805                r[1].column: r[0]
1806                for r in info.join_field.related_fields
1807                if r[1].column in cur_targets
1808            }
1809            targets = tuple(targets_dict[t.column] for t in targets)
1810            self.unref_alias(joins.pop())
1811        return targets, joins[-1], joins
1812
1813    @classmethod
1814    def _gen_cols(cls, exprs, include_external=False, resolve_refs=True):
1815        for expr in exprs:
1816            if isinstance(expr, Col):
1817                yield expr
1818            elif include_external and callable(
1819                getattr(expr, "get_external_cols", None)
1820            ):
1821                yield from expr.get_external_cols()
1822            elif hasattr(expr, "get_source_expressions"):
1823                if not resolve_refs and isinstance(expr, Ref):
1824                    continue
1825                yield from cls._gen_cols(
1826                    expr.get_source_expressions(),
1827                    include_external=include_external,
1828                    resolve_refs=resolve_refs,
1829                )
1830
1831    @classmethod
1832    def _gen_col_aliases(cls, exprs):
1833        yield from (expr.alias for expr in cls._gen_cols(exprs))
1834
1835    def resolve_ref(self, name, allow_joins=True, reuse=None, summarize=False):
1836        annotation = self.annotations.get(name)
1837        if annotation is not None:
1838            if not allow_joins:
1839                for alias in self._gen_col_aliases([annotation]):
1840                    if isinstance(self.alias_map[alias], Join):
1841                        raise FieldError(
1842                            "Joined field references are not permitted in this query"
1843                        )
1844            if summarize:
1845                # Summarize currently means we are doing an aggregate() query
1846                # which is executed as a wrapped subquery if any of the
1847                # aggregate() elements reference an existing annotation. In
1848                # that case we need to return a Ref to the subquery's annotation.
1849                if name not in self.annotation_select:
1850                    raise FieldError(
1851                        f"Cannot aggregate over the '{name}' alias. Use annotate() "
1852                        "to promote it."
1853                    )
1854                return Ref(name, self.annotation_select[name])
1855            else:
1856                return annotation
1857        else:
1858            field_list = name.split(LOOKUP_SEP)
1859            annotation = self.annotations.get(field_list[0])
1860            if annotation is not None:
1861                for transform in field_list[1:]:
1862                    annotation = self.try_transform(annotation, transform)
1863                return annotation
1864            join_info = self.setup_joins(
1865                field_list, self.get_meta(), self.get_initial_alias(), can_reuse=reuse
1866            )
1867            targets, final_alias, join_list = self.trim_joins(
1868                join_info.targets, join_info.joins, join_info.path
1869            )
1870            if not allow_joins and len(join_list) > 1:
1871                raise FieldError(
1872                    "Joined field references are not permitted in this query"
1873                )
1874            if len(targets) > 1:
1875                raise FieldError(
1876                    "Referencing multicolumn fields with F() objects isn't supported"
1877                )
1878            # Verify that the last lookup in name is a field or a transform:
1879            # transform_function() raises FieldError if not.
1880            transform = join_info.transform_function(targets[0], final_alias)
1881            if reuse is not None:
1882                reuse.update(join_list)
1883            return transform
1884
1885    def split_exclude(self, filter_expr, can_reuse, names_with_path):
1886        """
1887        When doing an exclude against any kind of N-to-many relation, we need
1888        to use a subquery. This method constructs the nested query, given the
1889        original exclude filter (filter_expr) and the portion up to the first
1890        N-to-many relation field.
1891
1892        For example, if the origin filter is ~Q(child__name='foo'), filter_expr
1893        is ('child__name', 'foo') and can_reuse is a set of joins usable for
1894        filters in the original query.
1895
1896        We will turn this into equivalent of:
1897            WHERE NOT EXISTS(
1898                SELECT 1
1899                FROM child
1900                WHERE name = 'foo' AND child.parent_id = parent.id
1901                LIMIT 1
1902            )
1903        """
1904        # Generate the inner query.
1905        query = self.__class__(self.model)
1906        query._filtered_relations = self._filtered_relations
1907        filter_lhs, filter_rhs = filter_expr
1908        if isinstance(filter_rhs, OuterRef):
1909            filter_rhs = OuterRef(filter_rhs)
1910        elif isinstance(filter_rhs, F):
1911            filter_rhs = OuterRef(filter_rhs.name)
1912        query.add_filter(filter_lhs, filter_rhs)
1913        query.clear_ordering(force=True)
1914        # Try to have as simple as possible subquery -> trim leading joins from
1915        # the subquery.
1916        trimmed_prefix, contains_louter = query.trim_start(names_with_path)
1917
1918        col = query.select[0]
1919        select_field = col.target
1920        alias = col.alias
1921        if alias in can_reuse:
1922            id_field = select_field.model._meta.get_field("id")
1923            # Need to add a restriction so that outer query's filters are in effect for
1924            # the subquery, too.
1925            query.bump_prefix(self)
1926            lookup_class = select_field.get_lookup("exact")
1927            # Note that the query.select[0].alias is different from alias
1928            # due to bump_prefix above.
1929            lookup = lookup_class(
1930                id_field.get_col(query.select[0].alias), id_field.get_col(alias)
1931            )
1932            query.where.add(lookup, AND)
1933            query.external_aliases[alias] = True
1934
1935        lookup_class = select_field.get_lookup("exact")
1936        lookup = lookup_class(col, ResolvedOuterRef(trimmed_prefix))
1937        query.where.add(lookup, AND)
1938        condition, needed_inner = self.build_filter(Exists(query))
1939
1940        if contains_louter:
1941            or_null_condition, _ = self.build_filter(
1942                (f"{trimmed_prefix}__isnull", True),
1943                current_negated=True,
1944                branch_negated=True,
1945                can_reuse=can_reuse,
1946            )
1947            condition.add(or_null_condition, OR)
1948            # Note that the end result will be:
1949            # (outercol NOT IN innerq AND outercol IS NOT NULL) OR outercol IS NULL.
1950            # This might look crazy but due to how IN works, this seems to be
1951            # correct. If the IS NOT NULL check is removed then outercol NOT
1952            # IN will return UNKNOWN. If the IS NULL check is removed, then if
1953            # outercol IS NULL we will not match the row.
1954        return condition, needed_inner
1955
1956    def set_empty(self):
1957        self.where.add(NothingNode(), AND)
1958        for query in self.combined_queries:
1959            query.set_empty()
1960
1961    def is_empty(self):
1962        return any(isinstance(c, NothingNode) for c in self.where.children)
1963
1964    def set_limits(self, low=None, high=None):
1965        """
1966        Adjust the limits on the rows retrieved. Use low/high to set these,
1967        as it makes it more Pythonic to read and write. When the SQL query is
1968        created, convert them to the appropriate offset and limit values.
1969
1970        Apply any limits passed in here to the existing constraints. Add low
1971        to the current low value and clamp both to any existing high value.
1972        """
1973        if high is not None:
1974            if self.high_mark is not None:
1975                self.high_mark = min(self.high_mark, self.low_mark + high)
1976            else:
1977                self.high_mark = self.low_mark + high
1978        if low is not None:
1979            if self.high_mark is not None:
1980                self.low_mark = min(self.high_mark, self.low_mark + low)
1981            else:
1982                self.low_mark = self.low_mark + low
1983
1984        if self.low_mark == self.high_mark:
1985            self.set_empty()
1986
1987    def clear_limits(self):
1988        """Clear any existing limits."""
1989        self.low_mark, self.high_mark = 0, None
1990
1991    @property
1992    def is_sliced(self):
1993        return self.low_mark != 0 or self.high_mark is not None
1994
1995    def has_limit_one(self):
1996        return self.high_mark is not None and (self.high_mark - self.low_mark) == 1
1997
1998    def can_filter(self):
1999        """
2000        Return True if adding filters to this instance is still possible.
2001
2002        Typically, this means no limits or offsets have been put on the results.
2003        """
2004        return not self.is_sliced
2005
2006    def clear_select_clause(self):
2007        """Remove all fields from SELECT clause."""
2008        self.select = ()
2009        self.default_cols = False
2010        self.select_related = False
2011        self.set_extra_mask(())
2012        self.set_annotation_mask(())
2013
2014    def clear_select_fields(self):
2015        """
2016        Clear the list of fields to select (but not extra_select columns).
2017        Some queryset types completely replace any existing list of select
2018        columns.
2019        """
2020        self.select = ()
2021        self.values_select = ()
2022
2023    def add_select_col(self, col, name):
2024        self.select += (col,)
2025        self.values_select += (name,)
2026
2027    def set_select(self, cols):
2028        self.default_cols = False
2029        self.select = tuple(cols)
2030
2031    def add_distinct_fields(self, *field_names):
2032        """
2033        Add and resolve the given fields to the query's "distinct on" clause.
2034        """
2035        self.distinct_fields = field_names
2036        self.distinct = True
2037
2038    def add_fields(self, field_names, allow_m2m=True):
2039        """
2040        Add the given (model) fields to the select set. Add the field names in
2041        the order specified.
2042        """
2043        alias = self.get_initial_alias()
2044        opts = self.get_meta()
2045
2046        try:
2047            cols = []
2048            for name in field_names:
2049                # Join promotion note - we must not remove any rows here, so
2050                # if there is no existing joins, use outer join.
2051                join_info = self.setup_joins(
2052                    name.split(LOOKUP_SEP), opts, alias, allow_many=allow_m2m
2053                )
2054                targets, final_alias, joins = self.trim_joins(
2055                    join_info.targets,
2056                    join_info.joins,
2057                    join_info.path,
2058                )
2059                for target in targets:
2060                    cols.append(join_info.transform_function(target, final_alias))
2061            if cols:
2062                self.set_select(cols)
2063        except MultiJoin:
2064            raise FieldError(f"Invalid field name: '{name}'")
2065        except FieldError:
2066            if LOOKUP_SEP in name:
2067                # For lookups spanning over relationships, show the error
2068                # from the model on which the lookup failed.
2069                raise
2070            elif name in self.annotations:
2071                raise FieldError(
2072                    f"Cannot select the '{name}' alias. Use annotate() to promote it."
2073                )
2074            else:
2075                names = sorted(
2076                    [
2077                        *get_field_names_from_opts(opts),
2078                        *self.extra,
2079                        *self.annotation_select,
2080                        *self._filtered_relations,
2081                    ]
2082                )
2083                raise FieldError(
2084                    "Cannot resolve keyword {!r} into field. Choices are: {}".format(
2085                        name, ", ".join(names)
2086                    )
2087                )
2088
2089    def add_ordering(self, *ordering):
2090        """
2091        Add items from the 'ordering' sequence to the query's "order by"
2092        clause. These items are either field names (not column names) --
2093        possibly with a direction prefix ('-' or '?') -- or OrderBy
2094        expressions.
2095
2096        If 'ordering' is empty, clear all ordering from the query.
2097        """
2098        errors = []
2099        for item in ordering:
2100            if isinstance(item, str):
2101                if item == "?":
2102                    continue
2103                item = item.removeprefix("-")
2104                if item in self.annotations:
2105                    continue
2106                if self.extra and item in self.extra:
2107                    continue
2108                # names_to_path() validates the lookup. A descriptive
2109                # FieldError will be raise if it's not.
2110                self.names_to_path(item.split(LOOKUP_SEP), self.model._meta)
2111            elif not hasattr(item, "resolve_expression"):
2112                errors.append(item)
2113            if getattr(item, "contains_aggregate", False):
2114                raise FieldError(
2115                    "Using an aggregate in order_by() without also including "
2116                    f"it in annotate() is not allowed: {item}"
2117                )
2118        if errors:
2119            raise FieldError(f"Invalid order_by arguments: {errors}")
2120        if ordering:
2121            self.order_by += ordering
2122        else:
2123            self.default_ordering = False
2124
2125    def clear_ordering(self, force=False, clear_default=True):
2126        """
2127        Remove any ordering settings if the current query allows it without
2128        side effects, set 'force' to True to clear the ordering regardless.
2129        If 'clear_default' is True, there will be no ordering in the resulting
2130        query (not even the model's default).
2131        """
2132        if not force and (
2133            self.is_sliced or self.distinct_fields or self.select_for_update
2134        ):
2135            return
2136        self.order_by = ()
2137        self.extra_order_by = ()
2138        if clear_default:
2139            self.default_ordering = False
2140
2141    def set_group_by(self, allow_aliases=True):
2142        """
2143        Expand the GROUP BY clause required by the query.
2144
2145        This will usually be the set of all non-aggregate fields in the
2146        return data. If the database backend supports grouping by the
2147        primary key, and the query would be equivalent, the optimization
2148        will be made automatically.
2149        """
2150        if allow_aliases and self.values_select:
2151            # If grouping by aliases is allowed assign selected value aliases
2152            # by moving them to annotations.
2153            group_by_annotations = {}
2154            values_select = {}
2155            for alias, expr in zip(self.values_select, self.select):
2156                if isinstance(expr, Col):
2157                    values_select[alias] = expr
2158                else:
2159                    group_by_annotations[alias] = expr
2160            self.annotations = {**group_by_annotations, **self.annotations}
2161            self.append_annotation_mask(group_by_annotations)
2162            self.select = tuple(values_select.values())
2163            self.values_select = tuple(values_select)
2164        group_by = list(self.select)
2165        for alias, annotation in self.annotation_select.items():
2166            if not (group_by_cols := annotation.get_group_by_cols()):
2167                continue
2168            if allow_aliases and not annotation.contains_aggregate:
2169                group_by.append(Ref(alias, annotation))
2170            else:
2171                group_by.extend(group_by_cols)
2172        self.group_by = tuple(group_by)
2173
2174    def add_select_related(self, fields):
2175        """
2176        Set up the select_related data structure so that we only select
2177        certain related models (as opposed to all models, when
2178        self.select_related=True).
2179        """
2180        if isinstance(self.select_related, bool):
2181            field_dict = {}
2182        else:
2183            field_dict = self.select_related
2184        for field in fields:
2185            d = field_dict
2186            for part in field.split(LOOKUP_SEP):
2187                d = d.setdefault(part, {})
2188        self.select_related = field_dict
2189
2190    def add_extra(self, select, select_params, where, params, tables, order_by):
2191        """
2192        Add data to the various extra_* attributes for user-created additions
2193        to the query.
2194        """
2195        if select:
2196            # We need to pair any placeholder markers in the 'select'
2197            # dictionary with their parameters in 'select_params' so that
2198            # subsequent updates to the select dictionary also adjust the
2199            # parameters appropriately.
2200            select_pairs = {}
2201            if select_params:
2202                param_iter = iter(select_params)
2203            else:
2204                param_iter = iter([])
2205            for name, entry in select.items():
2206                self.check_alias(name)
2207                entry = str(entry)
2208                entry_params = []
2209                pos = entry.find("%s")
2210                while pos != -1:
2211                    if pos == 0 or entry[pos - 1] != "%":
2212                        entry_params.append(next(param_iter))
2213                    pos = entry.find("%s", pos + 2)
2214                select_pairs[name] = (entry, entry_params)
2215            self.extra.update(select_pairs)
2216        if where or params:
2217            self.where.add(ExtraWhere(where, params), AND)
2218        if tables:
2219            self.extra_tables += tuple(tables)
2220        if order_by:
2221            self.extra_order_by = order_by
2222
2223    def clear_deferred_loading(self):
2224        """Remove any fields from the deferred loading set."""
2225        self.deferred_loading = (frozenset(), True)
2226
2227    def add_deferred_loading(self, field_names):
2228        """
2229        Add the given list of model field names to the set of fields to
2230        exclude from loading from the database when automatic column selection
2231        is done. Add the new field names to any existing field names that
2232        are deferred (or removed from any existing field names that are marked
2233        as the only ones for immediate loading).
2234        """
2235        # Fields on related models are stored in the literal double-underscore
2236        # format, so that we can use a set datastructure. We do the foo__bar
2237        # splitting and handling when computing the SQL column names (as part of
2238        # get_columns()).
2239        existing, defer = self.deferred_loading
2240        if defer:
2241            # Add to existing deferred names.
2242            self.deferred_loading = existing.union(field_names), True
2243        else:
2244            # Remove names from the set of any existing "immediate load" names.
2245            if new_existing := existing.difference(field_names):
2246                self.deferred_loading = new_existing, False
2247            else:
2248                self.clear_deferred_loading()
2249                if new_only := set(field_names).difference(existing):
2250                    self.deferred_loading = new_only, True
2251
2252    def add_immediate_loading(self, field_names):
2253        """
2254        Add the given list of model field names to the set of fields to
2255        retrieve when the SQL is executed ("immediate loading" fields). The
2256        field names replace any existing immediate loading field names. If
2257        there are field names already specified for deferred loading, remove
2258        those names from the new field_names before storing the new names
2259        for immediate loading. (That is, immediate loading overrides any
2260        existing immediate values, but respects existing deferrals.)
2261        """
2262        existing, defer = self.deferred_loading
2263        field_names = set(field_names)
2264
2265        if defer:
2266            # Remove any existing deferred names from the current set before
2267            # setting the new names.
2268            self.deferred_loading = field_names.difference(existing), False
2269        else:
2270            # Replace any existing "immediate load" field names.
2271            self.deferred_loading = frozenset(field_names), False
2272
2273    def set_annotation_mask(self, names):
2274        """Set the mask of annotations that will be returned by the SELECT."""
2275        if names is None:
2276            self.annotation_select_mask = None
2277        else:
2278            self.annotation_select_mask = set(names)
2279        self._annotation_select_cache = None
2280
2281    def append_annotation_mask(self, names):
2282        if self.annotation_select_mask is not None:
2283            self.set_annotation_mask(self.annotation_select_mask.union(names))
2284
2285    def set_extra_mask(self, names):
2286        """
2287        Set the mask of extra select items that will be returned by SELECT.
2288        Don't remove them from the Query since they might be used later.
2289        """
2290        if names is None:
2291            self.extra_select_mask = None
2292        else:
2293            self.extra_select_mask = set(names)
2294        self._extra_select_cache = None
2295
2296    def set_values(self, fields):
2297        self.select_related = False
2298        self.clear_deferred_loading()
2299        self.clear_select_fields()
2300        self.has_select_fields = True
2301
2302        if fields:
2303            field_names = []
2304            extra_names = []
2305            annotation_names = []
2306            if not self.extra and not self.annotations:
2307                # Shortcut - if there are no extra or annotations, then
2308                # the values() clause must be just field names.
2309                field_names = list(fields)
2310            else:
2311                self.default_cols = False
2312                for f in fields:
2313                    if f in self.extra_select:
2314                        extra_names.append(f)
2315                    elif f in self.annotation_select:
2316                        annotation_names.append(f)
2317                    else:
2318                        field_names.append(f)
2319            self.set_extra_mask(extra_names)
2320            self.set_annotation_mask(annotation_names)
2321            selected = frozenset(field_names + extra_names + annotation_names)
2322        else:
2323            field_names = [f.attname for f in self.model._meta.concrete_fields]
2324            selected = frozenset(field_names)
2325        # Selected annotations must be known before setting the GROUP BY
2326        # clause.
2327        if self.group_by is True:
2328            self.add_fields(
2329                (f.attname for f in self.model._meta.concrete_fields), False
2330            )
2331            # Disable GROUP BY aliases to avoid orphaning references to the
2332            # SELECT clause which is about to be cleared.
2333            self.set_group_by(allow_aliases=False)
2334            self.clear_select_fields()
2335        elif self.group_by:
2336            # Resolve GROUP BY annotation references if they are not part of
2337            # the selected fields anymore.
2338            group_by = []
2339            for expr in self.group_by:
2340                if isinstance(expr, Ref) and expr.refs not in selected:
2341                    expr = self.annotations[expr.refs]
2342                group_by.append(expr)
2343            self.group_by = tuple(group_by)
2344
2345        self.values_select = tuple(field_names)
2346        self.add_fields(field_names, True)
2347
2348    @property
2349    def annotation_select(self):
2350        """
2351        Return the dictionary of aggregate columns that are not masked and
2352        should be used in the SELECT clause. Cache this result for performance.
2353        """
2354        if self._annotation_select_cache is not None:
2355            return self._annotation_select_cache
2356        elif not self.annotations:
2357            return {}
2358        elif self.annotation_select_mask is not None:
2359            self._annotation_select_cache = {
2360                k: v
2361                for k, v in self.annotations.items()
2362                if k in self.annotation_select_mask
2363            }
2364            return self._annotation_select_cache
2365        else:
2366            return self.annotations
2367
2368    @property
2369    def extra_select(self):
2370        if self._extra_select_cache is not None:
2371            return self._extra_select_cache
2372        if not self.extra:
2373            return {}
2374        elif self.extra_select_mask is not None:
2375            self._extra_select_cache = {
2376                k: v for k, v in self.extra.items() if k in self.extra_select_mask
2377            }
2378            return self._extra_select_cache
2379        else:
2380            return self.extra
2381
2382    def trim_start(self, names_with_path):
2383        """
2384        Trim joins from the start of the join path. The candidates for trim
2385        are the PathInfos in names_with_path structure that are m2m joins.
2386
2387        Also set the select column so the start matches the join.
2388
2389        This method is meant to be used for generating the subquery joins &
2390        cols in split_exclude().
2391
2392        Return a lookup usable for doing outerq.filter(lookup=self) and a
2393        boolean indicating if the joins in the prefix contain a LEFT OUTER join.
2394        _"""
2395        all_paths = []
2396        for _, paths in names_with_path:
2397            all_paths.extend(paths)
2398        contains_louter = False
2399        # Trim and operate only on tables that were generated for
2400        # the lookup part of the query. That is, avoid trimming
2401        # joins generated for F() expressions.
2402        lookup_tables = [
2403            t for t in self.alias_map if t in self._lookup_joins or t == self.base_table
2404        ]
2405        for trimmed_paths, path in enumerate(all_paths):
2406            if path.m2m:
2407                break
2408            if self.alias_map[lookup_tables[trimmed_paths + 1]].join_type == LOUTER:
2409                contains_louter = True
2410            alias = lookup_tables[trimmed_paths]
2411            self.unref_alias(alias)
2412        # The path.join_field is a Rel, lets get the other side's field
2413        join_field = path.join_field.field
2414        # Build the filter prefix.
2415        paths_in_prefix = trimmed_paths
2416        trimmed_prefix = []
2417        for name, path in names_with_path:
2418            if paths_in_prefix - len(path) < 0:
2419                break
2420            trimmed_prefix.append(name)
2421            paths_in_prefix -= len(path)
2422        trimmed_prefix.append(join_field.foreign_related_fields[0].name)
2423        trimmed_prefix = LOOKUP_SEP.join(trimmed_prefix)
2424        # Lets still see if we can trim the first join from the inner query
2425        # (that is, self). We can't do this for:
2426        # - LEFT JOINs because we would miss those rows that have nothing on
2427        #   the outer side,
2428        # - INNER JOINs from filtered relations because we would miss their
2429        #   filters.
2430        first_join = self.alias_map[lookup_tables[trimmed_paths + 1]]
2431        if first_join.join_type != LOUTER and not first_join.filtered_relation:
2432            select_fields = [r[0] for r in join_field.related_fields]
2433            select_alias = lookup_tables[trimmed_paths + 1]
2434            self.unref_alias(lookup_tables[trimmed_paths])
2435            extra_restriction = join_field.get_extra_restriction(
2436                None, lookup_tables[trimmed_paths + 1]
2437            )
2438            if extra_restriction:
2439                self.where.add(extra_restriction, AND)
2440        else:
2441            # TODO: It might be possible to trim more joins from the start of the
2442            # inner query if it happens to have a longer join chain containing the
2443            # values in select_fields. Lets punt this one for now.
2444            select_fields = [r[1] for r in join_field.related_fields]
2445            select_alias = lookup_tables[trimmed_paths]
2446        # The found starting point is likely a join_class instead of a
2447        # base_table_class reference. But the first entry in the query's FROM
2448        # clause must not be a JOIN.
2449        for table in self.alias_map:
2450            if self.alias_refcount[table] > 0:
2451                self.alias_map[table] = self.base_table_class(
2452                    self.alias_map[table].table_name,
2453                    table,
2454                )
2455                break
2456        self.set_select([f.get_col(select_alias) for f in select_fields])
2457        return trimmed_prefix, contains_louter
2458
2459    def is_nullable(self, field):
2460        """Check if the given field should be treated as nullable."""
2461        # QuerySet does not have knowledge of which connection is going to be
2462        # used. For the single-database setup we always reference the default
2463        # connection here.
2464        return field.allow_null
2465
2466
2467def get_order_dir(field, default="ASC"):
2468    """
2469    Return the field name and direction for an order specification. For
2470    example, '-foo' is returned as ('foo', 'DESC').
2471
2472    The 'default' param is used to indicate which way no prefix (or a '+'
2473    prefix) should sort. The '-' prefix always sorts the opposite way.
2474    """
2475    dirn = ORDER_DIR[default]
2476    if field[0] == "-":
2477        return field[1:], dirn[1]
2478    return field, dirn[0]
2479
2480
2481class JoinPromoter:
2482    """
2483    A class to abstract away join promotion problems for complex filter
2484    conditions.
2485    """
2486
2487    def __init__(self, connector, num_children, negated):
2488        self.connector = connector
2489        self.negated = negated
2490        if self.negated:
2491            if connector == AND:
2492                self.effective_connector = OR
2493            else:
2494                self.effective_connector = AND
2495        else:
2496            self.effective_connector = self.connector
2497        self.num_children = num_children
2498        # Maps of table alias to how many times it is seen as required for
2499        # inner and/or outer joins.
2500        self.votes = Counter()
2501
2502    def __repr__(self):
2503        return (
2504            f"{self.__class__.__qualname__}(connector={self.connector!r}, "
2505            f"num_children={self.num_children!r}, negated={self.negated!r})"
2506        )
2507
2508    def add_votes(self, votes):
2509        """
2510        Add single vote per item to self.votes. Parameter can be any
2511        iterable.
2512        """
2513        self.votes.update(votes)
2514
2515    def update_join_types(self, query):
2516        """
2517        Change join types so that the generated query is as efficient as
2518        possible, but still correct. So, change as many joins as possible
2519        to INNER, but don't make OUTER joins INNER if that could remove
2520        results from the query.
2521        """
2522        to_promote = set()
2523        to_demote = set()
2524        # The effective_connector is used so that NOT (a AND b) is treated
2525        # similarly to (a OR b) for join promotion.
2526        for table, votes in self.votes.items():
2527            # We must use outer joins in OR case when the join isn't contained
2528            # in all of the joins. Otherwise the INNER JOIN itself could remove
2529            # valid results. Consider the case where a model with rel_a and
2530            # rel_b relations is queried with rel_a__col=1 | rel_b__col=2. Now,
2531            # if rel_a join doesn't produce any results is null (for example
2532            # reverse foreign key or null value in direct foreign key), and
2533            # there is a matching row in rel_b with col=2, then an INNER join
2534            # to rel_a would remove a valid match from the query. So, we need
2535            # to promote any existing INNER to LOUTER (it is possible this
2536            # promotion in turn will be demoted later on).
2537            if self.effective_connector == OR and votes < self.num_children:
2538                to_promote.add(table)
2539            # If connector is AND and there is a filter that can match only
2540            # when there is a joinable row, then use INNER. For example, in
2541            # rel_a__col=1 & rel_b__col=2, if either of the rels produce NULL
2542            # as join output, then the col=1 or col=2 can't match (as
2543            # NULL=anything is always false).
2544            # For the OR case, if all children voted for a join to be inner,
2545            # then we can use INNER for the join. For example:
2546            #     (rel_a__col__icontains=Alex | rel_a__col__icontains=Russell)
2547            # then if rel_a doesn't produce any rows, the whole condition
2548            # can't match. Hence we can safely use INNER join.
2549            if self.effective_connector == AND or (
2550                self.effective_connector == OR and votes == self.num_children
2551            ):
2552                to_demote.add(table)
2553            # Finally, what happens in cases where we have:
2554            #    (rel_a__col=1|rel_b__col=2) & rel_a__col__gte=0
2555            # Now, we first generate the OR clause, and promote joins for it
2556            # in the first if branch above. Both rel_a and rel_b are promoted
2557            # to LOUTER joins. After that we do the AND case. The OR case
2558            # voted no inner joins but the rel_a__col__gte=0 votes inner join
2559            # for rel_a. We demote it back to INNER join (in AND case a single
2560            # vote is enough). The demotion is OK, if rel_a doesn't produce
2561            # rows, then the rel_a__col__gte=0 clause can't be true, and thus
2562            # the whole clause must be false. So, it is safe to use INNER
2563            # join.
2564            # Note that in this example we could just as well have the __gte
2565            # clause and the OR clause swapped. Or we could replace the __gte
2566            # clause with an OR clause containing rel_a__col=1|rel_a__col=2,
2567            # and again we could safely demote to INNER.
2568        query.promote_joins(to_promote)
2569        query.demote_joins(to_demote)
2570        return to_demote