Plain is headed towards 1.0! Subscribe for development updates →

   1"""
   2Create SQL statements for QuerySets.
   3
   4The code in here encapsulates all of the SQL construction so that QuerySets
   5themselves do not have to (and could be backed by things other than SQL
   6databases). The abstraction barrier only works one way: this module has to know
   7all about the internals of models in order to get the information it needs.
   8"""
   9
  10import copy
  11import difflib
  12import functools
  13import sys
  14from collections import Counter, namedtuple
  15from collections.abc import Iterator, Mapping
  16from functools import cached_property
  17from itertools import chain, count, product
  18from string import ascii_uppercase
  19
  20from plain.exceptions import FieldDoesNotExist, FieldError
  21from plain.models.aggregates import Count
  22from plain.models.constants import LOOKUP_SEP
  23from plain.models.db import NotSupportedError, db_connection
  24from plain.models.expressions import (
  25    BaseExpression,
  26    Col,
  27    Exists,
  28    F,
  29    OuterRef,
  30    Ref,
  31    ResolvedOuterRef,
  32    Value,
  33)
  34from plain.models.fields import Field
  35from plain.models.fields.related_lookups import MultiColSource
  36from plain.models.lookups import Lookup
  37from plain.models.query_utils import (
  38    Q,
  39    check_rel_lookup_compatibility,
  40    refs_expression,
  41)
  42from plain.models.sql.constants import INNER, LOUTER, ORDER_DIR, SINGLE
  43from plain.models.sql.datastructures import BaseTable, Empty, Join, MultiJoin
  44from plain.models.sql.where import AND, OR, ExtraWhere, NothingNode, WhereNode
  45from plain.utils.regex_helper import _lazy_re_compile
  46from plain.utils.tree import Node
  47
  48__all__ = ["Query", "RawQuery"]
  49
  50# Quotation marks ('"`[]), whitespace characters, semicolons, or inline
  51# SQL comments are forbidden in column aliases.
  52FORBIDDEN_ALIAS_PATTERN = _lazy_re_compile(r"['`\"\]\[;\s]|--|/\*|\*/")
  53
  54# Inspired from
  55# https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS
  56EXPLAIN_OPTIONS_PATTERN = _lazy_re_compile(r"[\w\-]+")
  57
  58
  59def get_field_names_from_opts(opts):
  60    if opts is None:
  61        return set()
  62    return set(
  63        chain.from_iterable(
  64            (f.name, f.attname) if f.concrete else (f.name,) for f in opts.get_fields()
  65        )
  66    )
  67
  68
  69def get_children_from_q(q):
  70    for child in q.children:
  71        if isinstance(child, Node):
  72            yield from get_children_from_q(child)
  73        else:
  74            yield child
  75
  76
  77JoinInfo = namedtuple(
  78    "JoinInfo",
  79    ("final_field", "targets", "opts", "joins", "path", "transform_function"),
  80)
  81
  82
  83class RawQuery:
  84    """A single raw SQL query."""
  85
  86    def __init__(self, sql, params=()):
  87        self.params = params
  88        self.sql = sql
  89        self.cursor = None
  90
  91        # Mirror some properties of a normal query so that
  92        # the compiler can be used to process results.
  93        self.low_mark, self.high_mark = 0, None  # Used for offset/limit
  94        self.extra_select = {}
  95        self.annotation_select = {}
  96
  97    def chain(self):
  98        return self.clone()
  99
 100    def clone(self):
 101        return RawQuery(self.sql, params=self.params)
 102
 103    def get_columns(self):
 104        if self.cursor is None:
 105            self._execute_query()
 106        converter = db_connection.introspection.identifier_converter
 107        return [converter(column_meta[0]) for column_meta in self.cursor.description]
 108
 109    def __iter__(self):
 110        # Always execute a new query for a new iterator.
 111        # This could be optimized with a cache at the expense of RAM.
 112        self._execute_query()
 113        if not db_connection.features.can_use_chunked_reads:
 114            # If the database can't use chunked reads we need to make sure we
 115            # evaluate the entire query up front.
 116            result = list(self.cursor)
 117        else:
 118            result = self.cursor
 119        return iter(result)
 120
 121    def __repr__(self):
 122        return f"<{self.__class__.__name__}: {self}>"
 123
 124    @property
 125    def params_type(self):
 126        if self.params is None:
 127            return None
 128        return dict if isinstance(self.params, Mapping) else tuple
 129
 130    def __str__(self):
 131        if self.params_type is None:
 132            return self.sql
 133        return self.sql % self.params_type(self.params)
 134
 135    def _execute_query(self):
 136        # Adapt parameters to the database, as much as possible considering
 137        # that the target type isn't known. See #17755.
 138        params_type = self.params_type
 139        adapter = db_connection.ops.adapt_unknown_value
 140        if params_type is tuple:
 141            params = tuple(adapter(val) for val in self.params)
 142        elif params_type is dict:
 143            params = {key: adapter(val) for key, val in self.params.items()}
 144        elif params_type is None:
 145            params = None
 146        else:
 147            raise RuntimeError(f"Unexpected params type: {params_type}")
 148
 149        self.cursor = db_connection.cursor()
 150        self.cursor.execute(self.sql, params)
 151
 152
 153ExplainInfo = namedtuple("ExplainInfo", ("format", "options"))
 154
 155
 156class Query(BaseExpression):
 157    """A single SQL query."""
 158
 159    alias_prefix = "T"
 160    empty_result_set_value = None
 161    subq_aliases = frozenset([alias_prefix])
 162
 163    compiler = "SQLCompiler"
 164
 165    base_table_class = BaseTable
 166    join_class = Join
 167
 168    default_cols = True
 169    default_ordering = True
 170    standard_ordering = True
 171
 172    filter_is_sticky = False
 173    subquery = False
 174
 175    # SQL-related attributes.
 176    # Select and related select clauses are expressions to use in the SELECT
 177    # clause of the query. The select is used for cases where we want to set up
 178    # the select clause to contain other than default fields (values(),
 179    # subqueries...). Note that annotations go to annotations dictionary.
 180    select = ()
 181    # The group_by attribute can have one of the following forms:
 182    #  - None: no group by at all in the query
 183    #  - A tuple of expressions: group by (at least) those expressions.
 184    #    String refs are also allowed for now.
 185    #  - True: group by all select fields of the model
 186    # See compiler.get_group_by() for details.
 187    group_by = None
 188    order_by = ()
 189    low_mark = 0  # Used for offset/limit.
 190    high_mark = None  # Used for offset/limit.
 191    distinct = False
 192    distinct_fields = ()
 193    select_for_update = False
 194    select_for_update_nowait = False
 195    select_for_update_skip_locked = False
 196    select_for_update_of = ()
 197    select_for_no_key_update = False
 198    select_related = False
 199    has_select_fields = False
 200    # Arbitrary limit for select_related to prevents infinite recursion.
 201    max_depth = 5
 202    # Holds the selects defined by a call to values() or values_list()
 203    # excluding annotation_select and extra_select.
 204    values_select = ()
 205
 206    # SQL annotation-related attributes.
 207    annotation_select_mask = None
 208    _annotation_select_cache = None
 209
 210    # Set combination attributes.
 211    combinator = None
 212    combinator_all = False
 213    combined_queries = ()
 214
 215    # These are for extensions. The contents are more or less appended verbatim
 216    # to the appropriate clause.
 217    extra_select_mask = None
 218    _extra_select_cache = None
 219
 220    extra_tables = ()
 221    extra_order_by = ()
 222
 223    # A tuple that is a set of model field names and either True, if these are
 224    # the fields to defer, or False if these are the only fields to load.
 225    deferred_loading = (frozenset(), True)
 226
 227    explain_info = None
 228
 229    def __init__(self, model, alias_cols=True):
 230        self.model = model
 231        self.alias_refcount = {}
 232        # alias_map is the most important data structure regarding joins.
 233        # It's used for recording which joins exist in the query and what
 234        # types they are. The key is the alias of the joined table (possibly
 235        # the table name) and the value is a Join-like object (see
 236        # sql.datastructures.Join for more information).
 237        self.alias_map = {}
 238        # Whether to provide alias to columns during reference resolving.
 239        self.alias_cols = alias_cols
 240        # Sometimes the query contains references to aliases in outer queries (as
 241        # a result of split_exclude). Correct alias quoting needs to know these
 242        # aliases too.
 243        # Map external tables to whether they are aliased.
 244        self.external_aliases = {}
 245        self.table_map = {}  # Maps table names to list of aliases.
 246        self.used_aliases = set()
 247
 248        self.where = WhereNode()
 249        # Maps alias -> Annotation Expression.
 250        self.annotations = {}
 251        # These are for extensions. The contents are more or less appended
 252        # verbatim to the appropriate clause.
 253        self.extra = {}  # Maps col_alias -> (col_sql, params).
 254
 255        self._filtered_relations = {}
 256
 257    @property
 258    def output_field(self):
 259        if len(self.select) == 1:
 260            select = self.select[0]
 261            return getattr(select, "target", None) or select.field
 262        elif len(self.annotation_select) == 1:
 263            return next(iter(self.annotation_select.values())).output_field
 264
 265    @cached_property
 266    def base_table(self):
 267        for alias in self.alias_map:
 268            return alias
 269
 270    def __str__(self):
 271        """
 272        Return the query as a string of SQL with the parameter values
 273        substituted in (use sql_with_params() to see the unsubstituted string).
 274
 275        Parameter values won't necessarily be quoted correctly, since that is
 276        done by the database interface at execution time.
 277        """
 278        sql, params = self.sql_with_params()
 279        return sql % params
 280
 281    def sql_with_params(self):
 282        """
 283        Return the query as an SQL string and the parameters that will be
 284        substituted into the query.
 285        """
 286        return self.get_compiler().as_sql()
 287
 288    def __deepcopy__(self, memo):
 289        """Limit the amount of work when a Query is deepcopied."""
 290        result = self.clone()
 291        memo[id(self)] = result
 292        return result
 293
 294    def get_compiler(self, *, elide_empty=True):
 295        return db_connection.ops.compiler(self.compiler)(
 296            self, db_connection, elide_empty
 297        )
 298
 299    def get_meta(self):
 300        """
 301        Return the Options instance (the model._meta) from which to start
 302        processing. Normally, this is self.model._meta, but it can be changed
 303        by subclasses.
 304        """
 305        if self.model:
 306            return self.model._meta
 307
 308    def clone(self):
 309        """
 310        Return a copy of the current Query. A lightweight alternative to
 311        deepcopy().
 312        """
 313        obj = Empty()
 314        obj.__class__ = self.__class__
 315        # Copy references to everything.
 316        obj.__dict__ = self.__dict__.copy()
 317        # Clone attributes that can't use shallow copy.
 318        obj.alias_refcount = self.alias_refcount.copy()
 319        obj.alias_map = self.alias_map.copy()
 320        obj.external_aliases = self.external_aliases.copy()
 321        obj.table_map = self.table_map.copy()
 322        obj.where = self.where.clone()
 323        obj.annotations = self.annotations.copy()
 324        if self.annotation_select_mask is not None:
 325            obj.annotation_select_mask = self.annotation_select_mask.copy()
 326        if self.combined_queries:
 327            obj.combined_queries = tuple(
 328                [query.clone() for query in self.combined_queries]
 329            )
 330        # _annotation_select_cache cannot be copied, as doing so breaks the
 331        # (necessary) state in which both annotations and
 332        # _annotation_select_cache point to the same underlying objects.
 333        # It will get re-populated in the cloned queryset the next time it's
 334        # used.
 335        obj._annotation_select_cache = None
 336        obj.extra = self.extra.copy()
 337        if self.extra_select_mask is not None:
 338            obj.extra_select_mask = self.extra_select_mask.copy()
 339        if self._extra_select_cache is not None:
 340            obj._extra_select_cache = self._extra_select_cache.copy()
 341        if self.select_related is not False:
 342            # Use deepcopy because select_related stores fields in nested
 343            # dicts.
 344            obj.select_related = copy.deepcopy(obj.select_related)
 345        if "subq_aliases" in self.__dict__:
 346            obj.subq_aliases = self.subq_aliases.copy()
 347        obj.used_aliases = self.used_aliases.copy()
 348        obj._filtered_relations = self._filtered_relations.copy()
 349        # Clear the cached_property, if it exists.
 350        obj.__dict__.pop("base_table", None)
 351        return obj
 352
 353    def chain(self, klass=None):
 354        """
 355        Return a copy of the current Query that's ready for another operation.
 356        The klass argument changes the type of the Query, e.g. UpdateQuery.
 357        """
 358        obj = self.clone()
 359        if klass and obj.__class__ != klass:
 360            obj.__class__ = klass
 361        if not obj.filter_is_sticky:
 362            obj.used_aliases = set()
 363        obj.filter_is_sticky = False
 364        if hasattr(obj, "_setup_query"):
 365            obj._setup_query()
 366        return obj
 367
 368    def relabeled_clone(self, change_map):
 369        clone = self.clone()
 370        clone.change_aliases(change_map)
 371        return clone
 372
 373    def _get_col(self, target, field, alias):
 374        if not self.alias_cols:
 375            alias = None
 376        return target.get_col(alias, field)
 377
 378    def get_aggregation(self, aggregate_exprs):
 379        """
 380        Return the dictionary with the values of the existing aggregations.
 381        """
 382        if not aggregate_exprs:
 383            return {}
 384        aggregates = {}
 385        for alias, aggregate_expr in aggregate_exprs.items():
 386            self.check_alias(alias)
 387            aggregate = aggregate_expr.resolve_expression(
 388                self, allow_joins=True, reuse=None, summarize=True
 389            )
 390            if not aggregate.contains_aggregate:
 391                raise TypeError(f"{alias} is not an aggregate expression")
 392            aggregates[alias] = aggregate
 393        # Existing usage of aggregation can be determined by the presence of
 394        # selected aggregates but also by filters against aliased aggregates.
 395        _, having, qualify = self.where.split_having_qualify()
 396        has_existing_aggregation = (
 397            any(
 398                getattr(annotation, "contains_aggregate", True)
 399                for annotation in self.annotations.values()
 400            )
 401            or having
 402        )
 403        # Decide if we need to use a subquery.
 404        #
 405        # Existing aggregations would cause incorrect results as
 406        # get_aggregation() must produce just one result and thus must not use
 407        # GROUP BY.
 408        #
 409        # If the query has limit or distinct, or uses set operations, then
 410        # those operations must be done in a subquery so that the query
 411        # aggregates on the limit and/or distinct results instead of applying
 412        # the distinct and limit after the aggregation.
 413        if (
 414            isinstance(self.group_by, tuple)
 415            or self.is_sliced
 416            or has_existing_aggregation
 417            or qualify
 418            or self.distinct
 419            or self.combinator
 420        ):
 421            from plain.models.sql.subqueries import AggregateQuery
 422
 423            inner_query = self.clone()
 424            inner_query.subquery = True
 425            outer_query = AggregateQuery(self.model, inner_query)
 426            inner_query.select_for_update = False
 427            inner_query.select_related = False
 428            inner_query.set_annotation_mask(self.annotation_select)
 429            # Queries with distinct_fields need ordering and when a limit is
 430            # applied we must take the slice from the ordered query. Otherwise
 431            # no need for ordering.
 432            inner_query.clear_ordering(force=False)
 433            if not inner_query.distinct:
 434                # If the inner query uses default select and it has some
 435                # aggregate annotations, then we must make sure the inner
 436                # query is grouped by the main model's primary key. However,
 437                # clearing the select clause can alter results if distinct is
 438                # used.
 439                if inner_query.default_cols and has_existing_aggregation:
 440                    inner_query.group_by = (
 441                        self.model._meta.pk.get_col(inner_query.get_initial_alias()),
 442                    )
 443                inner_query.default_cols = False
 444                if not qualify:
 445                    # Mask existing annotations that are not referenced by
 446                    # aggregates to be pushed to the outer query unless
 447                    # filtering against window functions is involved as it
 448                    # requires complex realising.
 449                    annotation_mask = set()
 450                    for aggregate in aggregates.values():
 451                        annotation_mask |= aggregate.get_refs()
 452                    inner_query.set_annotation_mask(annotation_mask)
 453
 454            # Add aggregates to the outer AggregateQuery. This requires making
 455            # sure all columns referenced by the aggregates are selected in the
 456            # inner query. It is achieved by retrieving all column references
 457            # by the aggregates, explicitly selecting them in the inner query,
 458            # and making sure the aggregates are repointed to them.
 459            col_refs = {}
 460            for alias, aggregate in aggregates.items():
 461                replacements = {}
 462                for col in self._gen_cols([aggregate], resolve_refs=False):
 463                    if not (col_ref := col_refs.get(col)):
 464                        index = len(col_refs) + 1
 465                        col_alias = f"__col{index}"
 466                        col_ref = Ref(col_alias, col)
 467                        col_refs[col] = col_ref
 468                        inner_query.annotations[col_alias] = col
 469                        inner_query.append_annotation_mask([col_alias])
 470                    replacements[col] = col_ref
 471                outer_query.annotations[alias] = aggregate.replace_expressions(
 472                    replacements
 473                )
 474            if (
 475                inner_query.select == ()
 476                and not inner_query.default_cols
 477                and not inner_query.annotation_select_mask
 478            ):
 479                # In case of Model.objects[0:3].count(), there would be no
 480                # field selected in the inner query, yet we must use a subquery.
 481                # So, make sure at least one field is selected.
 482                inner_query.select = (
 483                    self.model._meta.pk.get_col(inner_query.get_initial_alias()),
 484                )
 485        else:
 486            outer_query = self
 487            self.select = ()
 488            self.default_cols = False
 489            self.extra = {}
 490            if self.annotations:
 491                # Inline reference to existing annotations and mask them as
 492                # they are unnecessary given only the summarized aggregations
 493                # are requested.
 494                replacements = {
 495                    Ref(alias, annotation): annotation
 496                    for alias, annotation in self.annotations.items()
 497                }
 498                self.annotations = {
 499                    alias: aggregate.replace_expressions(replacements)
 500                    for alias, aggregate in aggregates.items()
 501                }
 502            else:
 503                self.annotations = aggregates
 504            self.set_annotation_mask(aggregates)
 505
 506        empty_set_result = [
 507            expression.empty_result_set_value
 508            for expression in outer_query.annotation_select.values()
 509        ]
 510        elide_empty = not any(result is NotImplemented for result in empty_set_result)
 511        outer_query.clear_ordering(force=True)
 512        outer_query.clear_limits()
 513        outer_query.select_for_update = False
 514        outer_query.select_related = False
 515        compiler = outer_query.get_compiler(elide_empty=elide_empty)
 516        result = compiler.execute_sql(SINGLE)
 517        if result is None:
 518            result = empty_set_result
 519        else:
 520            converters = compiler.get_converters(outer_query.annotation_select.values())
 521            result = next(compiler.apply_converters((result,), converters))
 522
 523        return dict(zip(outer_query.annotation_select, result))
 524
 525    def get_count(self):
 526        """
 527        Perform a COUNT() query using the current filter constraints.
 528        """
 529        obj = self.clone()
 530        return obj.get_aggregation({"__count": Count("*")})["__count"]
 531
 532    def has_filters(self):
 533        return self.where
 534
 535    def exists(self, limit=True):
 536        q = self.clone()
 537        if not (q.distinct and q.is_sliced):
 538            if q.group_by is True:
 539                q.add_fields(
 540                    (f.attname for f in self.model._meta.concrete_fields), False
 541                )
 542                # Disable GROUP BY aliases to avoid orphaning references to the
 543                # SELECT clause which is about to be cleared.
 544                q.set_group_by(allow_aliases=False)
 545            q.clear_select_clause()
 546        if q.combined_queries and q.combinator == "union":
 547            q.combined_queries = tuple(
 548                combined_query.exists(limit=False)
 549                for combined_query in q.combined_queries
 550            )
 551        q.clear_ordering(force=True)
 552        if limit:
 553            q.set_limits(high=1)
 554        q.add_annotation(Value(1), "a")
 555        return q
 556
 557    def has_results(self):
 558        q = self.exists()
 559        compiler = q.get_compiler()
 560        return compiler.has_results()
 561
 562    def explain(self, format=None, **options):
 563        q = self.clone()
 564        for option_name in options:
 565            if (
 566                not EXPLAIN_OPTIONS_PATTERN.fullmatch(option_name)
 567                or "--" in option_name
 568            ):
 569                raise ValueError(f"Invalid option name: {option_name!r}.")
 570        q.explain_info = ExplainInfo(format, options)
 571        compiler = q.get_compiler()
 572        return "\n".join(compiler.explain_query())
 573
 574    def combine(self, rhs, connector):
 575        """
 576        Merge the 'rhs' query into the current one (with any 'rhs' effects
 577        being applied *after* (that is, "to the right of") anything in the
 578        current query. 'rhs' is not modified during a call to this function.
 579
 580        The 'connector' parameter describes how to connect filters from the
 581        'rhs' query.
 582        """
 583        if self.model != rhs.model:
 584            raise TypeError("Cannot combine queries on two different base models.")
 585        if self.is_sliced:
 586            raise TypeError("Cannot combine queries once a slice has been taken.")
 587        if self.distinct != rhs.distinct:
 588            raise TypeError("Cannot combine a unique query with a non-unique query.")
 589        if self.distinct_fields != rhs.distinct_fields:
 590            raise TypeError("Cannot combine queries with different distinct fields.")
 591
 592        # If lhs and rhs shares the same alias prefix, it is possible to have
 593        # conflicting alias changes like T4 -> T5, T5 -> T6, which might end up
 594        # as T4 -> T6 while combining two querysets. To prevent this, change an
 595        # alias prefix of the rhs and update current aliases accordingly,
 596        # except if the alias is the base table since it must be present in the
 597        # query on both sides.
 598        initial_alias = self.get_initial_alias()
 599        rhs.bump_prefix(self, exclude={initial_alias})
 600
 601        # Work out how to relabel the rhs aliases, if necessary.
 602        change_map = {}
 603        conjunction = connector == AND
 604
 605        # Determine which existing joins can be reused. When combining the
 606        # query with AND we must recreate all joins for m2m filters. When
 607        # combining with OR we can reuse joins. The reason is that in AND
 608        # case a single row can't fulfill a condition like:
 609        #     revrel__col=1 & revrel__col=2
 610        # But, there might be two different related rows matching this
 611        # condition. In OR case a single True is enough, so single row is
 612        # enough, too.
 613        #
 614        # Note that we will be creating duplicate joins for non-m2m joins in
 615        # the AND case. The results will be correct but this creates too many
 616        # joins. This is something that could be fixed later on.
 617        reuse = set() if conjunction else set(self.alias_map)
 618        joinpromoter = JoinPromoter(connector, 2, False)
 619        joinpromoter.add_votes(
 620            j for j in self.alias_map if self.alias_map[j].join_type == INNER
 621        )
 622        rhs_votes = set()
 623        # Now, add the joins from rhs query into the new query (skipping base
 624        # table).
 625        rhs_tables = list(rhs.alias_map)[1:]
 626        for alias in rhs_tables:
 627            join = rhs.alias_map[alias]
 628            # If the left side of the join was already relabeled, use the
 629            # updated alias.
 630            join = join.relabeled_clone(change_map)
 631            new_alias = self.join(join, reuse=reuse)
 632            if join.join_type == INNER:
 633                rhs_votes.add(new_alias)
 634            # We can't reuse the same join again in the query. If we have two
 635            # distinct joins for the same connection in rhs query, then the
 636            # combined query must have two joins, too.
 637            reuse.discard(new_alias)
 638            if alias != new_alias:
 639                change_map[alias] = new_alias
 640            if not rhs.alias_refcount[alias]:
 641                # The alias was unused in the rhs query. Unref it so that it
 642                # will be unused in the new query, too. We have to add and
 643                # unref the alias so that join promotion has information of
 644                # the join type for the unused alias.
 645                self.unref_alias(new_alias)
 646        joinpromoter.add_votes(rhs_votes)
 647        joinpromoter.update_join_types(self)
 648
 649        # Combine subqueries aliases to ensure aliases relabelling properly
 650        # handle subqueries when combining where and select clauses.
 651        self.subq_aliases |= rhs.subq_aliases
 652
 653        # Now relabel a copy of the rhs where-clause and add it to the current
 654        # one.
 655        w = rhs.where.clone()
 656        w.relabel_aliases(change_map)
 657        self.where.add(w, connector)
 658
 659        # Selection columns and extra extensions are those provided by 'rhs'.
 660        if rhs.select:
 661            self.set_select([col.relabeled_clone(change_map) for col in rhs.select])
 662        else:
 663            self.select = ()
 664
 665        if connector == OR:
 666            # It would be nice to be able to handle this, but the queries don't
 667            # really make sense (or return consistent value sets). Not worth
 668            # the extra complexity when you can write a real query instead.
 669            if self.extra and rhs.extra:
 670                raise ValueError(
 671                    "When merging querysets using 'or', you cannot have "
 672                    "extra(select=...) on both sides."
 673                )
 674        self.extra.update(rhs.extra)
 675        extra_select_mask = set()
 676        if self.extra_select_mask is not None:
 677            extra_select_mask.update(self.extra_select_mask)
 678        if rhs.extra_select_mask is not None:
 679            extra_select_mask.update(rhs.extra_select_mask)
 680        if extra_select_mask:
 681            self.set_extra_mask(extra_select_mask)
 682        self.extra_tables += rhs.extra_tables
 683
 684        # Ordering uses the 'rhs' ordering, unless it has none, in which case
 685        # the current ordering is used.
 686        self.order_by = rhs.order_by or self.order_by
 687        self.extra_order_by = rhs.extra_order_by or self.extra_order_by
 688
 689    def _get_defer_select_mask(self, opts, mask, select_mask=None):
 690        if select_mask is None:
 691            select_mask = {}
 692        select_mask[opts.pk] = {}
 693        # All concrete fields that are not part of the defer mask must be
 694        # loaded. If a relational field is encountered it gets added to the
 695        # mask for it be considered if `select_related` and the cycle continues
 696        # by recursively calling this function.
 697        for field in opts.concrete_fields:
 698            field_mask = mask.pop(field.name, None)
 699            if field_mask is None:
 700                select_mask.setdefault(field, {})
 701            elif field_mask:
 702                if not field.is_relation:
 703                    raise FieldError(next(iter(field_mask)))
 704                field_select_mask = select_mask.setdefault(field, {})
 705                related_model = field.remote_field.model._meta.concrete_model
 706                self._get_defer_select_mask(
 707                    related_model._meta, field_mask, field_select_mask
 708                )
 709        # Remaining defer entries must be references to reverse relationships.
 710        # The following code is expected to raise FieldError if it encounters
 711        # a malformed defer entry.
 712        for field_name, field_mask in mask.items():
 713            if filtered_relation := self._filtered_relations.get(field_name):
 714                relation = opts.get_field(filtered_relation.relation_name)
 715                field_select_mask = select_mask.setdefault((field_name, relation), {})
 716                field = relation.field
 717            else:
 718                field = opts.get_field(field_name).field
 719                field_select_mask = select_mask.setdefault(field, {})
 720            related_model = field.model._meta.concrete_model
 721            self._get_defer_select_mask(
 722                related_model._meta, field_mask, field_select_mask
 723            )
 724        return select_mask
 725
 726    def _get_only_select_mask(self, opts, mask, select_mask=None):
 727        if select_mask is None:
 728            select_mask = {}
 729        select_mask[opts.pk] = {}
 730        # Only include fields mentioned in the mask.
 731        for field_name, field_mask in mask.items():
 732            field = opts.get_field(field_name)
 733            field_select_mask = select_mask.setdefault(field, {})
 734            if field_mask:
 735                if not field.is_relation:
 736                    raise FieldError(next(iter(field_mask)))
 737                related_model = field.remote_field.model._meta.concrete_model
 738                self._get_only_select_mask(
 739                    related_model._meta, field_mask, field_select_mask
 740                )
 741        return select_mask
 742
 743    def get_select_mask(self):
 744        """
 745        Convert the self.deferred_loading data structure to an alternate data
 746        structure, describing the field that *will* be loaded. This is used to
 747        compute the columns to select from the database and also by the
 748        QuerySet class to work out which fields are being initialized on each
 749        model. Models that have all their fields included aren't mentioned in
 750        the result, only those that have field restrictions in place.
 751        """
 752        field_names, defer = self.deferred_loading
 753        if not field_names:
 754            return {}
 755        mask = {}
 756        for field_name in field_names:
 757            part_mask = mask
 758            for part in field_name.split(LOOKUP_SEP):
 759                part_mask = part_mask.setdefault(part, {})
 760        opts = self.get_meta()
 761        if defer:
 762            return self._get_defer_select_mask(opts, mask)
 763        return self._get_only_select_mask(opts, mask)
 764
 765    def table_alias(self, table_name, create=False, filtered_relation=None):
 766        """
 767        Return a table alias for the given table_name and whether this is a
 768        new alias or not.
 769
 770        If 'create' is true, a new alias is always created. Otherwise, the
 771        most recently created alias for the table (if one exists) is reused.
 772        """
 773        alias_list = self.table_map.get(table_name)
 774        if not create and alias_list:
 775            alias = alias_list[0]
 776            self.alias_refcount[alias] += 1
 777            return alias, False
 778
 779        # Create a new alias for this table.
 780        if alias_list:
 781            alias = "%s%d" % (self.alias_prefix, len(self.alias_map) + 1)  # noqa: UP031
 782            alias_list.append(alias)
 783        else:
 784            # The first occurrence of a table uses the table name directly.
 785            alias = (
 786                filtered_relation.alias if filtered_relation is not None else table_name
 787            )
 788            self.table_map[table_name] = [alias]
 789        self.alias_refcount[alias] = 1
 790        return alias, True
 791
 792    def ref_alias(self, alias):
 793        """Increases the reference count for this alias."""
 794        self.alias_refcount[alias] += 1
 795
 796    def unref_alias(self, alias, amount=1):
 797        """Decreases the reference count for this alias."""
 798        self.alias_refcount[alias] -= amount
 799
 800    def promote_joins(self, aliases):
 801        """
 802        Promote recursively the join type of given aliases and its children to
 803        an outer join. If 'unconditional' is False, only promote the join if
 804        it is nullable or the parent join is an outer join.
 805
 806        The children promotion is done to avoid join chains that contain a LOUTER
 807        b INNER c. So, if we have currently a INNER b INNER c and a->b is promoted,
 808        then we must also promote b->c automatically, or otherwise the promotion
 809        of a->b doesn't actually change anything in the query results.
 810        """
 811        aliases = list(aliases)
 812        while aliases:
 813            alias = aliases.pop(0)
 814            if self.alias_map[alias].join_type is None:
 815                # This is the base table (first FROM entry) - this table
 816                # isn't really joined at all in the query, so we should not
 817                # alter its join type.
 818                continue
 819            # Only the first alias (skipped above) should have None join_type
 820            assert self.alias_map[alias].join_type is not None
 821            parent_alias = self.alias_map[alias].parent_alias
 822            parent_louter = (
 823                parent_alias and self.alias_map[parent_alias].join_type == LOUTER
 824            )
 825            already_louter = self.alias_map[alias].join_type == LOUTER
 826            if (self.alias_map[alias].nullable or parent_louter) and not already_louter:
 827                self.alias_map[alias] = self.alias_map[alias].promote()
 828                # Join type of 'alias' changed, so re-examine all aliases that
 829                # refer to this one.
 830                aliases.extend(
 831                    join
 832                    for join in self.alias_map
 833                    if self.alias_map[join].parent_alias == alias
 834                    and join not in aliases
 835                )
 836
 837    def demote_joins(self, aliases):
 838        """
 839        Change join type from LOUTER to INNER for all joins in aliases.
 840
 841        Similarly to promote_joins(), this method must ensure no join chains
 842        containing first an outer, then an inner join are generated. If we
 843        are demoting b->c join in chain a LOUTER b LOUTER c then we must
 844        demote a->b automatically, or otherwise the demotion of b->c doesn't
 845        actually change anything in the query results. .
 846        """
 847        aliases = list(aliases)
 848        while aliases:
 849            alias = aliases.pop(0)
 850            if self.alias_map[alias].join_type == LOUTER:
 851                self.alias_map[alias] = self.alias_map[alias].demote()
 852                parent_alias = self.alias_map[alias].parent_alias
 853                if self.alias_map[parent_alias].join_type == INNER:
 854                    aliases.append(parent_alias)
 855
 856    def reset_refcounts(self, to_counts):
 857        """
 858        Reset reference counts for aliases so that they match the value passed
 859        in `to_counts`.
 860        """
 861        for alias, cur_refcount in self.alias_refcount.copy().items():
 862            unref_amount = cur_refcount - to_counts.get(alias, 0)
 863            self.unref_alias(alias, unref_amount)
 864
 865    def change_aliases(self, change_map):
 866        """
 867        Change the aliases in change_map (which maps old-alias -> new-alias),
 868        relabelling any references to them in select columns and the where
 869        clause.
 870        """
 871        # If keys and values of change_map were to intersect, an alias might be
 872        # updated twice (e.g. T4 -> T5, T5 -> T6, so also T4 -> T6) depending
 873        # on their order in change_map.
 874        assert set(change_map).isdisjoint(change_map.values())
 875
 876        # 1. Update references in "select" (normal columns plus aliases),
 877        # "group by" and "where".
 878        self.where.relabel_aliases(change_map)
 879        if isinstance(self.group_by, tuple):
 880            self.group_by = tuple(
 881                [col.relabeled_clone(change_map) for col in self.group_by]
 882            )
 883        self.select = tuple([col.relabeled_clone(change_map) for col in self.select])
 884        self.annotations = self.annotations and {
 885            key: col.relabeled_clone(change_map)
 886            for key, col in self.annotations.items()
 887        }
 888
 889        # 2. Rename the alias in the internal table/alias datastructures.
 890        for old_alias, new_alias in change_map.items():
 891            if old_alias not in self.alias_map:
 892                continue
 893            alias_data = self.alias_map[old_alias].relabeled_clone(change_map)
 894            self.alias_map[new_alias] = alias_data
 895            self.alias_refcount[new_alias] = self.alias_refcount[old_alias]
 896            del self.alias_refcount[old_alias]
 897            del self.alias_map[old_alias]
 898
 899            table_aliases = self.table_map[alias_data.table_name]
 900            for pos, alias in enumerate(table_aliases):
 901                if alias == old_alias:
 902                    table_aliases[pos] = new_alias
 903                    break
 904        self.external_aliases = {
 905            # Table is aliased or it's being changed and thus is aliased.
 906            change_map.get(alias, alias): (aliased or alias in change_map)
 907            for alias, aliased in self.external_aliases.items()
 908        }
 909
 910    def bump_prefix(self, other_query, exclude=None):
 911        """
 912        Change the alias prefix to the next letter in the alphabet in a way
 913        that the other query's aliases and this query's aliases will not
 914        conflict. Even tables that previously had no alias will get an alias
 915        after this call. To prevent changing aliases use the exclude parameter.
 916        """
 917
 918        def prefix_gen():
 919            """
 920            Generate a sequence of characters in alphabetical order:
 921                -> 'A', 'B', 'C', ...
 922
 923            When the alphabet is finished, the sequence will continue with the
 924            Cartesian product:
 925                -> 'AA', 'AB', 'AC', ...
 926            """
 927            alphabet = ascii_uppercase
 928            prefix = chr(ord(self.alias_prefix) + 1)
 929            yield prefix
 930            for n in count(1):
 931                seq = alphabet[alphabet.index(prefix) :] if prefix else alphabet
 932                for s in product(seq, repeat=n):
 933                    yield "".join(s)
 934                prefix = None
 935
 936        if self.alias_prefix != other_query.alias_prefix:
 937            # No clashes between self and outer query should be possible.
 938            return
 939
 940        # Explicitly avoid infinite loop. The constant divider is based on how
 941        # much depth recursive subquery references add to the stack. This value
 942        # might need to be adjusted when adding or removing function calls from
 943        # the code path in charge of performing these operations.
 944        local_recursion_limit = sys.getrecursionlimit() // 16
 945        for pos, prefix in enumerate(prefix_gen()):
 946            if prefix not in self.subq_aliases:
 947                self.alias_prefix = prefix
 948                break
 949            if pos > local_recursion_limit:
 950                raise RecursionError(
 951                    "Maximum recursion depth exceeded: too many subqueries."
 952                )
 953        self.subq_aliases = self.subq_aliases.union([self.alias_prefix])
 954        other_query.subq_aliases = other_query.subq_aliases.union(self.subq_aliases)
 955        if exclude is None:
 956            exclude = {}
 957        self.change_aliases(
 958            {
 959                alias: "%s%d" % (self.alias_prefix, pos)  # noqa: UP031
 960                for pos, alias in enumerate(self.alias_map)
 961                if alias not in exclude
 962            }
 963        )
 964
 965    def get_initial_alias(self):
 966        """
 967        Return the first alias for this query, after increasing its reference
 968        count.
 969        """
 970        if self.alias_map:
 971            alias = self.base_table
 972            self.ref_alias(alias)
 973        elif self.model:
 974            alias = self.join(self.base_table_class(self.get_meta().db_table, None))
 975        else:
 976            alias = None
 977        return alias
 978
 979    def count_active_tables(self):
 980        """
 981        Return the number of tables in this query with a non-zero reference
 982        count. After execution, the reference counts are zeroed, so tables
 983        added in compiler will not be seen by this method.
 984        """
 985        return len([1 for count in self.alias_refcount.values() if count])
 986
 987    def join(self, join, reuse=None, reuse_with_filtered_relation=False):
 988        """
 989        Return an alias for the 'join', either reusing an existing alias for
 990        that join or creating a new one. 'join' is either a base_table_class or
 991        join_class.
 992
 993        The 'reuse' parameter can be either None which means all joins are
 994        reusable, or it can be a set containing the aliases that can be reused.
 995
 996        The 'reuse_with_filtered_relation' parameter is used when computing
 997        FilteredRelation instances.
 998
 999        A join is always created as LOUTER if the lhs alias is LOUTER to make
1000        sure chains like t1 LOUTER t2 INNER t3 aren't generated. All new
1001        joins are created as LOUTER if the join is nullable.
1002        """
1003        if reuse_with_filtered_relation and reuse:
1004            reuse_aliases = [
1005                a for a, j in self.alias_map.items() if a in reuse and j.equals(join)
1006            ]
1007        else:
1008            reuse_aliases = [
1009                a
1010                for a, j in self.alias_map.items()
1011                if (reuse is None or a in reuse) and j == join
1012            ]
1013        if reuse_aliases:
1014            if join.table_alias in reuse_aliases:
1015                reuse_alias = join.table_alias
1016            else:
1017                # Reuse the most recent alias of the joined table
1018                # (a many-to-many relation may be joined multiple times).
1019                reuse_alias = reuse_aliases[-1]
1020            self.ref_alias(reuse_alias)
1021            return reuse_alias
1022
1023        # No reuse is possible, so we need a new alias.
1024        alias, _ = self.table_alias(
1025            join.table_name, create=True, filtered_relation=join.filtered_relation
1026        )
1027        if join.join_type:
1028            if self.alias_map[join.parent_alias].join_type == LOUTER or join.nullable:
1029                join_type = LOUTER
1030            else:
1031                join_type = INNER
1032            join.join_type = join_type
1033        join.table_alias = alias
1034        self.alias_map[alias] = join
1035        return alias
1036
1037    def check_alias(self, alias):
1038        if FORBIDDEN_ALIAS_PATTERN.search(alias):
1039            raise ValueError(
1040                "Column aliases cannot contain whitespace characters, quotation marks, "
1041                "semicolons, or SQL comments."
1042            )
1043
1044    def add_annotation(self, annotation, alias, select=True):
1045        """Add a single annotation expression to the Query."""
1046        self.check_alias(alias)
1047        annotation = annotation.resolve_expression(self, allow_joins=True, reuse=None)
1048        if select:
1049            self.append_annotation_mask([alias])
1050        else:
1051            self.set_annotation_mask(set(self.annotation_select).difference({alias}))
1052        self.annotations[alias] = annotation
1053
1054    def resolve_expression(self, query, *args, **kwargs):
1055        clone = self.clone()
1056        # Subqueries need to use a different set of aliases than the outer query.
1057        clone.bump_prefix(query)
1058        clone.subquery = True
1059        clone.where.resolve_expression(query, *args, **kwargs)
1060        # Resolve combined queries.
1061        if clone.combinator:
1062            clone.combined_queries = tuple(
1063                [
1064                    combined_query.resolve_expression(query, *args, **kwargs)
1065                    for combined_query in clone.combined_queries
1066                ]
1067            )
1068        for key, value in clone.annotations.items():
1069            resolved = value.resolve_expression(query, *args, **kwargs)
1070            if hasattr(resolved, "external_aliases"):
1071                resolved.external_aliases.update(clone.external_aliases)
1072            clone.annotations[key] = resolved
1073        # Outer query's aliases are considered external.
1074        for alias, table in query.alias_map.items():
1075            clone.external_aliases[alias] = (
1076                isinstance(table, Join)
1077                and table.join_field.related_model._meta.db_table != alias
1078            ) or (
1079                isinstance(table, BaseTable) and table.table_name != table.table_alias
1080            )
1081        return clone
1082
1083    def get_external_cols(self):
1084        exprs = chain(self.annotations.values(), self.where.children)
1085        return [
1086            col
1087            for col in self._gen_cols(exprs, include_external=True)
1088            if col.alias in self.external_aliases
1089        ]
1090
1091    def get_group_by_cols(self, wrapper=None):
1092        # If wrapper is referenced by an alias for an explicit GROUP BY through
1093        # values() a reference to this expression and not the self must be
1094        # returned to ensure external column references are not grouped against
1095        # as well.
1096        external_cols = self.get_external_cols()
1097        if any(col.possibly_multivalued for col in external_cols):
1098            return [wrapper or self]
1099        return external_cols
1100
1101    def as_sql(self, compiler, connection):
1102        # Some backends (e.g. Oracle) raise an error when a subquery contains
1103        # unnecessary ORDER BY clause.
1104        if (
1105            self.subquery
1106            and not db_connection.features.ignores_unnecessary_order_by_in_subqueries
1107        ):
1108            self.clear_ordering(force=False)
1109            for query in self.combined_queries:
1110                query.clear_ordering(force=False)
1111        sql, params = self.get_compiler().as_sql()
1112        if self.subquery:
1113            sql = f"({sql})"
1114        return sql, params
1115
1116    def resolve_lookup_value(self, value, can_reuse, allow_joins):
1117        if hasattr(value, "resolve_expression"):
1118            value = value.resolve_expression(
1119                self,
1120                reuse=can_reuse,
1121                allow_joins=allow_joins,
1122            )
1123        elif isinstance(value, list | tuple):
1124            # The items of the iterable may be expressions and therefore need
1125            # to be resolved independently.
1126            values = (
1127                self.resolve_lookup_value(sub_value, can_reuse, allow_joins)
1128                for sub_value in value
1129            )
1130            type_ = type(value)
1131            if hasattr(type_, "_make"):  # namedtuple
1132                return type_(*values)
1133            return type_(values)
1134        return value
1135
1136    def solve_lookup_type(self, lookup, summarize=False):
1137        """
1138        Solve the lookup type from the lookup (e.g.: 'foobar__id__icontains').
1139        """
1140        lookup_splitted = lookup.split(LOOKUP_SEP)
1141        if self.annotations:
1142            annotation, expression_lookups = refs_expression(
1143                lookup_splitted, self.annotations
1144            )
1145            if annotation:
1146                expression = self.annotations[annotation]
1147                if summarize:
1148                    expression = Ref(annotation, expression)
1149                return expression_lookups, (), expression
1150        _, field, _, lookup_parts = self.names_to_path(lookup_splitted, self.get_meta())
1151        field_parts = lookup_splitted[0 : len(lookup_splitted) - len(lookup_parts)]
1152        if len(lookup_parts) > 1 and not field_parts:
1153            raise FieldError(
1154                f'Invalid lookup "{lookup}" for model {self.get_meta().model.__name__}".'
1155            )
1156        return lookup_parts, field_parts, False
1157
1158    def check_query_object_type(self, value, opts, field):
1159        """
1160        Check whether the object passed while querying is of the correct type.
1161        If not, raise a ValueError specifying the wrong object.
1162        """
1163        if hasattr(value, "_meta"):
1164            if not check_rel_lookup_compatibility(value._meta.model, opts, field):
1165                raise ValueError(
1166                    f'Cannot query "{value}": Must be "{opts.object_name}" instance.'
1167                )
1168
1169    def check_related_objects(self, field, value, opts):
1170        """Check the type of object passed to query relations."""
1171        if field.is_relation:
1172            # Check that the field and the queryset use the same model in a
1173            # query like .filter(author=Author.objects.all()). For example, the
1174            # opts would be Author's (from the author field) and value.model
1175            # would be Author.objects.all() queryset's .model (Author also).
1176            # The field is the related field on the lhs side.
1177            if (
1178                isinstance(value, Query)
1179                and not value.has_select_fields
1180                and not check_rel_lookup_compatibility(value.model, opts, field)
1181            ):
1182                raise ValueError(
1183                    f'Cannot use QuerySet for "{value.model._meta.object_name}": Use a QuerySet for "{opts.object_name}".'
1184                )
1185            elif hasattr(value, "_meta"):
1186                self.check_query_object_type(value, opts, field)
1187            elif hasattr(value, "__iter__"):
1188                for v in value:
1189                    self.check_query_object_type(v, opts, field)
1190
1191    def check_filterable(self, expression):
1192        """Raise an error if expression cannot be used in a WHERE clause."""
1193        if hasattr(expression, "resolve_expression") and not getattr(
1194            expression, "filterable", True
1195        ):
1196            raise NotSupportedError(
1197                expression.__class__.__name__ + " is disallowed in the filter clause."
1198            )
1199        if hasattr(expression, "get_source_expressions"):
1200            for expr in expression.get_source_expressions():
1201                self.check_filterable(expr)
1202
1203    def build_lookup(self, lookups, lhs, rhs):
1204        """
1205        Try to extract transforms and lookup from given lhs.
1206
1207        The lhs value is something that works like SQLExpression.
1208        The rhs value is what the lookup is going to compare against.
1209        The lookups is a list of names to extract using get_lookup()
1210        and get_transform().
1211        """
1212        # __exact is the default lookup if one isn't given.
1213        *transforms, lookup_name = lookups or ["exact"]
1214        for name in transforms:
1215            lhs = self.try_transform(lhs, name)
1216        # First try get_lookup() so that the lookup takes precedence if the lhs
1217        # supports both transform and lookup for the name.
1218        lookup_class = lhs.get_lookup(lookup_name)
1219        if not lookup_class:
1220            # A lookup wasn't found. Try to interpret the name as a transform
1221            # and do an Exact lookup against it.
1222            lhs = self.try_transform(lhs, lookup_name)
1223            lookup_name = "exact"
1224            lookup_class = lhs.get_lookup(lookup_name)
1225            if not lookup_class:
1226                return
1227
1228        lookup = lookup_class(lhs, rhs)
1229        # Interpret '__exact=None' as the sql 'is NULL'; otherwise, reject all
1230        # uses of None as a query value unless the lookup supports it.
1231        if lookup.rhs is None and not lookup.can_use_none_as_rhs:
1232            if lookup_name not in ("exact", "iexact"):
1233                raise ValueError("Cannot use None as a query value")
1234            return lhs.get_lookup("isnull")(lhs, True)
1235
1236        return lookup
1237
1238    def try_transform(self, lhs, name):
1239        """
1240        Helper method for build_lookup(). Try to fetch and initialize
1241        a transform for name parameter from lhs.
1242        """
1243        transform_class = lhs.get_transform(name)
1244        if transform_class:
1245            return transform_class(lhs)
1246        else:
1247            output_field = lhs.output_field.__class__
1248            suggested_lookups = difflib.get_close_matches(
1249                name, output_field.get_lookups()
1250            )
1251            if suggested_lookups:
1252                suggestion = ", perhaps you meant {}?".format(
1253                    " or ".join(suggested_lookups)
1254                )
1255            else:
1256                suggestion = "."
1257            raise FieldError(
1258                f"Unsupported lookup '{name}' for {output_field.__name__} or join on the field not "
1259                f"permitted{suggestion}"
1260            )
1261
1262    def build_filter(
1263        self,
1264        filter_expr,
1265        branch_negated=False,
1266        current_negated=False,
1267        can_reuse=None,
1268        allow_joins=True,
1269        split_subq=True,
1270        reuse_with_filtered_relation=False,
1271        check_filterable=True,
1272        summarize=False,
1273    ):
1274        """
1275        Build a WhereNode for a single filter clause but don't add it
1276        to this Query. Query.add_q() will then add this filter to the where
1277        Node.
1278
1279        The 'branch_negated' tells us if the current branch contains any
1280        negations. This will be used to determine if subqueries are needed.
1281
1282        The 'current_negated' is used to determine if the current filter is
1283        negated or not and this will be used to determine if IS NULL filtering
1284        is needed.
1285
1286        The difference between current_negated and branch_negated is that
1287        branch_negated is set on first negation, but current_negated is
1288        flipped for each negation.
1289
1290        Note that add_filter will not do any negating itself, that is done
1291        upper in the code by add_q().
1292
1293        The 'can_reuse' is a set of reusable joins for multijoins.
1294
1295        If 'reuse_with_filtered_relation' is True, then only joins in can_reuse
1296        will be reused.
1297
1298        The method will create a filter clause that can be added to the current
1299        query. However, if the filter isn't added to the query then the caller
1300        is responsible for unreffing the joins used.
1301        """
1302        if isinstance(filter_expr, dict):
1303            raise FieldError("Cannot parse keyword query as dict")
1304        if isinstance(filter_expr, Q):
1305            return self._add_q(
1306                filter_expr,
1307                branch_negated=branch_negated,
1308                current_negated=current_negated,
1309                used_aliases=can_reuse,
1310                allow_joins=allow_joins,
1311                split_subq=split_subq,
1312                check_filterable=check_filterable,
1313                summarize=summarize,
1314            )
1315        if hasattr(filter_expr, "resolve_expression"):
1316            if not getattr(filter_expr, "conditional", False):
1317                raise TypeError("Cannot filter against a non-conditional expression.")
1318            condition = filter_expr.resolve_expression(
1319                self, allow_joins=allow_joins, summarize=summarize
1320            )
1321            if not isinstance(condition, Lookup):
1322                condition = self.build_lookup(["exact"], condition, True)
1323            return WhereNode([condition], connector=AND), []
1324        arg, value = filter_expr
1325        if not arg:
1326            raise FieldError(f"Cannot parse keyword query {arg!r}")
1327        lookups, parts, reffed_expression = self.solve_lookup_type(arg, summarize)
1328
1329        if check_filterable:
1330            self.check_filterable(reffed_expression)
1331
1332        if not allow_joins and len(parts) > 1:
1333            raise FieldError("Joined field references are not permitted in this query")
1334
1335        pre_joins = self.alias_refcount.copy()
1336        value = self.resolve_lookup_value(value, can_reuse, allow_joins)
1337        used_joins = {
1338            k for k, v in self.alias_refcount.items() if v > pre_joins.get(k, 0)
1339        }
1340
1341        if check_filterable:
1342            self.check_filterable(value)
1343
1344        if reffed_expression:
1345            condition = self.build_lookup(lookups, reffed_expression, value)
1346            return WhereNode([condition], connector=AND), []
1347
1348        opts = self.get_meta()
1349        alias = self.get_initial_alias()
1350        allow_many = not branch_negated or not split_subq
1351
1352        try:
1353            join_info = self.setup_joins(
1354                parts,
1355                opts,
1356                alias,
1357                can_reuse=can_reuse,
1358                allow_many=allow_many,
1359                reuse_with_filtered_relation=reuse_with_filtered_relation,
1360            )
1361
1362            # Prevent iterator from being consumed by check_related_objects()
1363            if isinstance(value, Iterator):
1364                value = list(value)
1365            self.check_related_objects(join_info.final_field, value, join_info.opts)
1366
1367            # split_exclude() needs to know which joins were generated for the
1368            # lookup parts
1369            self._lookup_joins = join_info.joins
1370        except MultiJoin as e:
1371            return self.split_exclude(filter_expr, can_reuse, e.names_with_path)
1372
1373        # Update used_joins before trimming since they are reused to determine
1374        # which joins could be later promoted to INNER.
1375        used_joins.update(join_info.joins)
1376        targets, alias, join_list = self.trim_joins(
1377            join_info.targets, join_info.joins, join_info.path
1378        )
1379        if can_reuse is not None:
1380            can_reuse.update(join_list)
1381
1382        if join_info.final_field.is_relation:
1383            if len(targets) == 1:
1384                col = self._get_col(targets[0], join_info.final_field, alias)
1385            else:
1386                col = MultiColSource(
1387                    alias, targets, join_info.targets, join_info.final_field
1388                )
1389        else:
1390            col = self._get_col(targets[0], join_info.final_field, alias)
1391
1392        condition = self.build_lookup(lookups, col, value)
1393        lookup_type = condition.lookup_name
1394        clause = WhereNode([condition], connector=AND)
1395
1396        require_outer = (
1397            lookup_type == "isnull" and condition.rhs is True and not current_negated
1398        )
1399        if (
1400            current_negated
1401            and (lookup_type != "isnull" or condition.rhs is False)
1402            and condition.rhs is not None
1403        ):
1404            require_outer = True
1405            if lookup_type != "isnull":
1406                # The condition added here will be SQL like this:
1407                # NOT (col IS NOT NULL), where the first NOT is added in
1408                # upper layers of code. The reason for addition is that if col
1409                # is null, then col != someval will result in SQL "unknown"
1410                # which isn't the same as in Python. The Python None handling
1411                # is wanted, and it can be gotten by
1412                # (col IS NULL OR col != someval)
1413                #   <=>
1414                # NOT (col IS NOT NULL AND col = someval).
1415                if (
1416                    self.is_nullable(targets[0])
1417                    or self.alias_map[join_list[-1]].join_type == LOUTER
1418                ):
1419                    lookup_class = targets[0].get_lookup("isnull")
1420                    col = self._get_col(targets[0], join_info.targets[0], alias)
1421                    clause.add(lookup_class(col, False), AND)
1422                # If someval is a nullable column, someval IS NOT NULL is
1423                # added.
1424                if isinstance(value, Col) and self.is_nullable(value.target):
1425                    lookup_class = value.target.get_lookup("isnull")
1426                    clause.add(lookup_class(value, False), AND)
1427        return clause, used_joins if not require_outer else ()
1428
1429    def add_filter(self, filter_lhs, filter_rhs):
1430        self.add_q(Q((filter_lhs, filter_rhs)))
1431
1432    def add_q(self, q_object):
1433        """
1434        A preprocessor for the internal _add_q(). Responsible for doing final
1435        join promotion.
1436        """
1437        # For join promotion this case is doing an AND for the added q_object
1438        # and existing conditions. So, any existing inner join forces the join
1439        # type to remain inner. Existing outer joins can however be demoted.
1440        # (Consider case where rel_a is LOUTER and rel_a__col=1 is added - if
1441        # rel_a doesn't produce any rows, then the whole condition must fail.
1442        # So, demotion is OK.
1443        existing_inner = {
1444            a for a in self.alias_map if self.alias_map[a].join_type == INNER
1445        }
1446        clause, _ = self._add_q(q_object, self.used_aliases)
1447        if clause:
1448            self.where.add(clause, AND)
1449        self.demote_joins(existing_inner)
1450
1451    def build_where(self, filter_expr):
1452        return self.build_filter(filter_expr, allow_joins=False)[0]
1453
1454    def clear_where(self):
1455        self.where = WhereNode()
1456
1457    def _add_q(
1458        self,
1459        q_object,
1460        used_aliases,
1461        branch_negated=False,
1462        current_negated=False,
1463        allow_joins=True,
1464        split_subq=True,
1465        check_filterable=True,
1466        summarize=False,
1467    ):
1468        """Add a Q-object to the current filter."""
1469        connector = q_object.connector
1470        current_negated ^= q_object.negated
1471        branch_negated = branch_negated or q_object.negated
1472        target_clause = WhereNode(connector=connector, negated=q_object.negated)
1473        joinpromoter = JoinPromoter(
1474            q_object.connector, len(q_object.children), current_negated
1475        )
1476        for child in q_object.children:
1477            child_clause, needed_inner = self.build_filter(
1478                child,
1479                can_reuse=used_aliases,
1480                branch_negated=branch_negated,
1481                current_negated=current_negated,
1482                allow_joins=allow_joins,
1483                split_subq=split_subq,
1484                check_filterable=check_filterable,
1485                summarize=summarize,
1486            )
1487            joinpromoter.add_votes(needed_inner)
1488            if child_clause:
1489                target_clause.add(child_clause, connector)
1490        needed_inner = joinpromoter.update_join_types(self)
1491        return target_clause, needed_inner
1492
1493    def build_filtered_relation_q(
1494        self, q_object, reuse, branch_negated=False, current_negated=False
1495    ):
1496        """Add a FilteredRelation object to the current filter."""
1497        connector = q_object.connector
1498        current_negated ^= q_object.negated
1499        branch_negated = branch_negated or q_object.negated
1500        target_clause = WhereNode(connector=connector, negated=q_object.negated)
1501        for child in q_object.children:
1502            if isinstance(child, Node):
1503                child_clause = self.build_filtered_relation_q(
1504                    child,
1505                    reuse=reuse,
1506                    branch_negated=branch_negated,
1507                    current_negated=current_negated,
1508                )
1509            else:
1510                child_clause, _ = self.build_filter(
1511                    child,
1512                    can_reuse=reuse,
1513                    branch_negated=branch_negated,
1514                    current_negated=current_negated,
1515                    allow_joins=True,
1516                    split_subq=False,
1517                    reuse_with_filtered_relation=True,
1518                )
1519            target_clause.add(child_clause, connector)
1520        return target_clause
1521
1522    def add_filtered_relation(self, filtered_relation, alias):
1523        filtered_relation.alias = alias
1524        lookups = dict(get_children_from_q(filtered_relation.condition))
1525        relation_lookup_parts, relation_field_parts, _ = self.solve_lookup_type(
1526            filtered_relation.relation_name
1527        )
1528        if relation_lookup_parts:
1529            raise ValueError(
1530                "FilteredRelation's relation_name cannot contain lookups "
1531                f"(got {filtered_relation.relation_name!r})."
1532            )
1533        for lookup in chain(lookups):
1534            lookup_parts, lookup_field_parts, _ = self.solve_lookup_type(lookup)
1535            shift = 2 if not lookup_parts else 1
1536            lookup_field_path = lookup_field_parts[:-shift]
1537            for idx, lookup_field_part in enumerate(lookup_field_path):
1538                if len(relation_field_parts) > idx:
1539                    if relation_field_parts[idx] != lookup_field_part:
1540                        raise ValueError(
1541                            "FilteredRelation's condition doesn't support "
1542                            f"relations outside the {filtered_relation.relation_name!r} (got {lookup!r})."
1543                        )
1544                else:
1545                    raise ValueError(
1546                        "FilteredRelation's condition doesn't support nested "
1547                        f"relations deeper than the relation_name (got {lookup!r} for "
1548                        f"{filtered_relation.relation_name!r})."
1549                    )
1550        self._filtered_relations[filtered_relation.alias] = filtered_relation
1551
1552    def names_to_path(self, names, opts, allow_many=True, fail_on_missing=False):
1553        """
1554        Walk the list of names and turns them into PathInfo tuples. A single
1555        name in 'names' can generate multiple PathInfos (m2m, for example).
1556
1557        'names' is the path of names to travel, 'opts' is the model Options we
1558        start the name resolving from, 'allow_many' is as for setup_joins().
1559        If fail_on_missing is set to True, then a name that can't be resolved
1560        will generate a FieldError.
1561
1562        Return a list of PathInfo tuples. In addition return the final field
1563        (the last used join field) and target (which is a field guaranteed to
1564        contain the same value as the final field). Finally, return those names
1565        that weren't found (which are likely transforms and the final lookup).
1566        """
1567        path, names_with_path = [], []
1568        for pos, name in enumerate(names):
1569            cur_names_with_path = (name, [])
1570            if name == "pk":
1571                name = opts.pk.name
1572
1573            field = None
1574            filtered_relation = None
1575            try:
1576                if opts is None:
1577                    raise FieldDoesNotExist
1578                field = opts.get_field(name)
1579            except FieldDoesNotExist:
1580                if name in self.annotation_select:
1581                    field = self.annotation_select[name].output_field
1582                elif name in self._filtered_relations and pos == 0:
1583                    filtered_relation = self._filtered_relations[name]
1584                    if LOOKUP_SEP in filtered_relation.relation_name:
1585                        parts = filtered_relation.relation_name.split(LOOKUP_SEP)
1586                        filtered_relation_path, field, _, _ = self.names_to_path(
1587                            parts,
1588                            opts,
1589                            allow_many,
1590                            fail_on_missing,
1591                        )
1592                        path.extend(filtered_relation_path[:-1])
1593                    else:
1594                        field = opts.get_field(filtered_relation.relation_name)
1595            if field is not None:
1596                # Fields that contain one-to-many relations with a generic
1597                # model (like a GenericForeignKey) cannot generate reverse
1598                # relations and therefore cannot be used for reverse querying.
1599                if field.is_relation and not field.related_model:
1600                    raise FieldError(
1601                        f"Field {name!r} does not generate an automatic reverse "
1602                        "relation and therefore cannot be used for reverse "
1603                        "querying. If it is a GenericForeignKey, consider "
1604                        "adding a GenericRelation."
1605                    )
1606            else:
1607                # We didn't find the current field, so move position back
1608                # one step.
1609                pos -= 1
1610                if pos == -1 or fail_on_missing:
1611                    available = sorted(
1612                        [
1613                            *get_field_names_from_opts(opts),
1614                            *self.annotation_select,
1615                            *self._filtered_relations,
1616                        ]
1617                    )
1618                    raise FieldError(
1619                        "Cannot resolve keyword '{}' into field. "
1620                        "Choices are: {}".format(name, ", ".join(available))
1621                    )
1622                break
1623
1624            if hasattr(field, "path_infos"):
1625                if filtered_relation:
1626                    pathinfos = field.get_path_info(filtered_relation)
1627                else:
1628                    pathinfos = field.path_infos
1629                if not allow_many:
1630                    for inner_pos, p in enumerate(pathinfos):
1631                        if p.m2m:
1632                            cur_names_with_path[1].extend(pathinfos[0 : inner_pos + 1])
1633                            names_with_path.append(cur_names_with_path)
1634                            raise MultiJoin(pos + 1, names_with_path)
1635                last = pathinfos[-1]
1636                path.extend(pathinfos)
1637                final_field = last.join_field
1638                opts = last.to_opts
1639                targets = last.target_fields
1640                cur_names_with_path[1].extend(pathinfos)
1641                names_with_path.append(cur_names_with_path)
1642            else:
1643                # Local non-relational field.
1644                final_field = field
1645                targets = (field,)
1646                if fail_on_missing and pos + 1 != len(names):
1647                    raise FieldError(
1648                        f"Cannot resolve keyword {names[pos + 1]!r} into field. Join on '{name}'"
1649                        " not permitted."
1650                    )
1651                break
1652        return path, final_field, targets, names[pos + 1 :]
1653
1654    def setup_joins(
1655        self,
1656        names,
1657        opts,
1658        alias,
1659        can_reuse=None,
1660        allow_many=True,
1661        reuse_with_filtered_relation=False,
1662    ):
1663        """
1664        Compute the necessary table joins for the passage through the fields
1665        given in 'names'. 'opts' is the Options class for the current model
1666        (which gives the table we are starting from), 'alias' is the alias for
1667        the table to start the joining from.
1668
1669        The 'can_reuse' defines the reverse foreign key joins we can reuse. It
1670        can be None in which case all joins are reusable or a set of aliases
1671        that can be reused. Note that non-reverse foreign keys are always
1672        reusable when using setup_joins().
1673
1674        The 'reuse_with_filtered_relation' can be used to force 'can_reuse'
1675        parameter and force the relation on the given connections.
1676
1677        If 'allow_many' is False, then any reverse foreign key seen will
1678        generate a MultiJoin exception.
1679
1680        Return the final field involved in the joins, the target field (used
1681        for any 'where' constraint), the final 'opts' value, the joins, the
1682        field path traveled to generate the joins, and a transform function
1683        that takes a field and alias and is equivalent to `field.get_col(alias)`
1684        in the simple case but wraps field transforms if they were included in
1685        names.
1686
1687        The target field is the field containing the concrete value. Final
1688        field can be something different, for example foreign key pointing to
1689        that value. Final field is needed for example in some value
1690        conversions (convert 'obj' in fk__id=obj to pk val using the foreign
1691        key field for example).
1692        """
1693        joins = [alias]
1694        # The transform can't be applied yet, as joins must be trimmed later.
1695        # To avoid making every caller of this method look up transforms
1696        # directly, compute transforms here and create a partial that converts
1697        # fields to the appropriate wrapped version.
1698
1699        def final_transformer(field, alias):
1700            if not self.alias_cols:
1701                alias = None
1702            return field.get_col(alias)
1703
1704        # Try resolving all the names as fields first. If there's an error,
1705        # treat trailing names as lookups until a field can be resolved.
1706        last_field_exception = None
1707        for pivot in range(len(names), 0, -1):
1708            try:
1709                path, final_field, targets, rest = self.names_to_path(
1710                    names[:pivot],
1711                    opts,
1712                    allow_many,
1713                    fail_on_missing=True,
1714                )
1715            except FieldError as exc:
1716                if pivot == 1:
1717                    # The first item cannot be a lookup, so it's safe
1718                    # to raise the field error here.
1719                    raise
1720                else:
1721                    last_field_exception = exc
1722            else:
1723                # The transforms are the remaining items that couldn't be
1724                # resolved into fields.
1725                transforms = names[pivot:]
1726                break
1727        for name in transforms:
1728
1729            def transform(field, alias, *, name, previous):
1730                try:
1731                    wrapped = previous(field, alias)
1732                    return self.try_transform(wrapped, name)
1733                except FieldError:
1734                    # FieldError is raised if the transform doesn't exist.
1735                    if isinstance(final_field, Field) and last_field_exception:
1736                        raise last_field_exception
1737                    else:
1738                        raise
1739
1740            final_transformer = functools.partial(
1741                transform, name=name, previous=final_transformer
1742            )
1743            final_transformer.has_transforms = True
1744        # Then, add the path to the query's joins. Note that we can't trim
1745        # joins at this stage - we will need the information about join type
1746        # of the trimmed joins.
1747        for join in path:
1748            if join.filtered_relation:
1749                filtered_relation = join.filtered_relation.clone()
1750                table_alias = filtered_relation.alias
1751            else:
1752                filtered_relation = None
1753                table_alias = None
1754            opts = join.to_opts
1755            if join.direct:
1756                nullable = self.is_nullable(join.join_field)
1757            else:
1758                nullable = True
1759            connection = self.join_class(
1760                opts.db_table,
1761                alias,
1762                table_alias,
1763                INNER,
1764                join.join_field,
1765                nullable,
1766                filtered_relation=filtered_relation,
1767            )
1768            reuse = can_reuse if join.m2m or reuse_with_filtered_relation else None
1769            alias = self.join(
1770                connection,
1771                reuse=reuse,
1772                reuse_with_filtered_relation=reuse_with_filtered_relation,
1773            )
1774            joins.append(alias)
1775            if filtered_relation:
1776                filtered_relation.path = joins[:]
1777        return JoinInfo(final_field, targets, opts, joins, path, final_transformer)
1778
1779    def trim_joins(self, targets, joins, path):
1780        """
1781        The 'target' parameter is the final field being joined to, 'joins'
1782        is the full list of join aliases. The 'path' contain the PathInfos
1783        used to create the joins.
1784
1785        Return the final target field and table alias and the new active
1786        joins.
1787
1788        Always trim any direct join if the target column is already in the
1789        previous table. Can't trim reverse joins as it's unknown if there's
1790        anything on the other side of the join.
1791        """
1792        joins = joins[:]
1793        for pos, info in enumerate(reversed(path)):
1794            if len(joins) == 1 or not info.direct:
1795                break
1796            if info.filtered_relation:
1797                break
1798            join_targets = {t.column for t in info.join_field.foreign_related_fields}
1799            cur_targets = {t.column for t in targets}
1800            if not cur_targets.issubset(join_targets):
1801                break
1802            targets_dict = {
1803                r[1].column: r[0]
1804                for r in info.join_field.related_fields
1805                if r[1].column in cur_targets
1806            }
1807            targets = tuple(targets_dict[t.column] for t in targets)
1808            self.unref_alias(joins.pop())
1809        return targets, joins[-1], joins
1810
1811    @classmethod
1812    def _gen_cols(cls, exprs, include_external=False, resolve_refs=True):
1813        for expr in exprs:
1814            if isinstance(expr, Col):
1815                yield expr
1816            elif include_external and callable(
1817                getattr(expr, "get_external_cols", None)
1818            ):
1819                yield from expr.get_external_cols()
1820            elif hasattr(expr, "get_source_expressions"):
1821                if not resolve_refs and isinstance(expr, Ref):
1822                    continue
1823                yield from cls._gen_cols(
1824                    expr.get_source_expressions(),
1825                    include_external=include_external,
1826                    resolve_refs=resolve_refs,
1827                )
1828
1829    @classmethod
1830    def _gen_col_aliases(cls, exprs):
1831        yield from (expr.alias for expr in cls._gen_cols(exprs))
1832
1833    def resolve_ref(self, name, allow_joins=True, reuse=None, summarize=False):
1834        annotation = self.annotations.get(name)
1835        if annotation is not None:
1836            if not allow_joins:
1837                for alias in self._gen_col_aliases([annotation]):
1838                    if isinstance(self.alias_map[alias], Join):
1839                        raise FieldError(
1840                            "Joined field references are not permitted in this query"
1841                        )
1842            if summarize:
1843                # Summarize currently means we are doing an aggregate() query
1844                # which is executed as a wrapped subquery if any of the
1845                # aggregate() elements reference an existing annotation. In
1846                # that case we need to return a Ref to the subquery's annotation.
1847                if name not in self.annotation_select:
1848                    raise FieldError(
1849                        f"Cannot aggregate over the '{name}' alias. Use annotate() "
1850                        "to promote it."
1851                    )
1852                return Ref(name, self.annotation_select[name])
1853            else:
1854                return annotation
1855        else:
1856            field_list = name.split(LOOKUP_SEP)
1857            annotation = self.annotations.get(field_list[0])
1858            if annotation is not None:
1859                for transform in field_list[1:]:
1860                    annotation = self.try_transform(annotation, transform)
1861                return annotation
1862            join_info = self.setup_joins(
1863                field_list, self.get_meta(), self.get_initial_alias(), can_reuse=reuse
1864            )
1865            targets, final_alias, join_list = self.trim_joins(
1866                join_info.targets, join_info.joins, join_info.path
1867            )
1868            if not allow_joins and len(join_list) > 1:
1869                raise FieldError(
1870                    "Joined field references are not permitted in this query"
1871                )
1872            if len(targets) > 1:
1873                raise FieldError(
1874                    "Referencing multicolumn fields with F() objects isn't supported"
1875                )
1876            # Verify that the last lookup in name is a field or a transform:
1877            # transform_function() raises FieldError if not.
1878            transform = join_info.transform_function(targets[0], final_alias)
1879            if reuse is not None:
1880                reuse.update(join_list)
1881            return transform
1882
1883    def split_exclude(self, filter_expr, can_reuse, names_with_path):
1884        """
1885        When doing an exclude against any kind of N-to-many relation, we need
1886        to use a subquery. This method constructs the nested query, given the
1887        original exclude filter (filter_expr) and the portion up to the first
1888        N-to-many relation field.
1889
1890        For example, if the origin filter is ~Q(child__name='foo'), filter_expr
1891        is ('child__name', 'foo') and can_reuse is a set of joins usable for
1892        filters in the original query.
1893
1894        We will turn this into equivalent of:
1895            WHERE NOT EXISTS(
1896                SELECT 1
1897                FROM child
1898                WHERE name = 'foo' AND child.parent_id = parent.id
1899                LIMIT 1
1900            )
1901        """
1902        # Generate the inner query.
1903        query = self.__class__(self.model)
1904        query._filtered_relations = self._filtered_relations
1905        filter_lhs, filter_rhs = filter_expr
1906        if isinstance(filter_rhs, OuterRef):
1907            filter_rhs = OuterRef(filter_rhs)
1908        elif isinstance(filter_rhs, F):
1909            filter_rhs = OuterRef(filter_rhs.name)
1910        query.add_filter(filter_lhs, filter_rhs)
1911        query.clear_ordering(force=True)
1912        # Try to have as simple as possible subquery -> trim leading joins from
1913        # the subquery.
1914        trimmed_prefix, contains_louter = query.trim_start(names_with_path)
1915
1916        col = query.select[0]
1917        select_field = col.target
1918        alias = col.alias
1919        if alias in can_reuse:
1920            pk = select_field.model._meta.pk
1921            # Need to add a restriction so that outer query's filters are in effect for
1922            # the subquery, too.
1923            query.bump_prefix(self)
1924            lookup_class = select_field.get_lookup("exact")
1925            # Note that the query.select[0].alias is different from alias
1926            # due to bump_prefix above.
1927            lookup = lookup_class(pk.get_col(query.select[0].alias), pk.get_col(alias))
1928            query.where.add(lookup, AND)
1929            query.external_aliases[alias] = True
1930
1931        lookup_class = select_field.get_lookup("exact")
1932        lookup = lookup_class(col, ResolvedOuterRef(trimmed_prefix))
1933        query.where.add(lookup, AND)
1934        condition, needed_inner = self.build_filter(Exists(query))
1935
1936        if contains_louter:
1937            or_null_condition, _ = self.build_filter(
1938                (f"{trimmed_prefix}__isnull", True),
1939                current_negated=True,
1940                branch_negated=True,
1941                can_reuse=can_reuse,
1942            )
1943            condition.add(or_null_condition, OR)
1944            # Note that the end result will be:
1945            # (outercol NOT IN innerq AND outercol IS NOT NULL) OR outercol IS NULL.
1946            # This might look crazy but due to how IN works, this seems to be
1947            # correct. If the IS NOT NULL check is removed then outercol NOT
1948            # IN will return UNKNOWN. If the IS NULL check is removed, then if
1949            # outercol IS NULL we will not match the row.
1950        return condition, needed_inner
1951
1952    def set_empty(self):
1953        self.where.add(NothingNode(), AND)
1954        for query in self.combined_queries:
1955            query.set_empty()
1956
1957    def is_empty(self):
1958        return any(isinstance(c, NothingNode) for c in self.where.children)
1959
1960    def set_limits(self, low=None, high=None):
1961        """
1962        Adjust the limits on the rows retrieved. Use low/high to set these,
1963        as it makes it more Pythonic to read and write. When the SQL query is
1964        created, convert them to the appropriate offset and limit values.
1965
1966        Apply any limits passed in here to the existing constraints. Add low
1967        to the current low value and clamp both to any existing high value.
1968        """
1969        if high is not None:
1970            if self.high_mark is not None:
1971                self.high_mark = min(self.high_mark, self.low_mark + high)
1972            else:
1973                self.high_mark = self.low_mark + high
1974        if low is not None:
1975            if self.high_mark is not None:
1976                self.low_mark = min(self.high_mark, self.low_mark + low)
1977            else:
1978                self.low_mark = self.low_mark + low
1979
1980        if self.low_mark == self.high_mark:
1981            self.set_empty()
1982
1983    def clear_limits(self):
1984        """Clear any existing limits."""
1985        self.low_mark, self.high_mark = 0, None
1986
1987    @property
1988    def is_sliced(self):
1989        return self.low_mark != 0 or self.high_mark is not None
1990
1991    def has_limit_one(self):
1992        return self.high_mark is not None and (self.high_mark - self.low_mark) == 1
1993
1994    def can_filter(self):
1995        """
1996        Return True if adding filters to this instance is still possible.
1997
1998        Typically, this means no limits or offsets have been put on the results.
1999        """
2000        return not self.is_sliced
2001
2002    def clear_select_clause(self):
2003        """Remove all fields from SELECT clause."""
2004        self.select = ()
2005        self.default_cols = False
2006        self.select_related = False
2007        self.set_extra_mask(())
2008        self.set_annotation_mask(())
2009
2010    def clear_select_fields(self):
2011        """
2012        Clear the list of fields to select (but not extra_select columns).
2013        Some queryset types completely replace any existing list of select
2014        columns.
2015        """
2016        self.select = ()
2017        self.values_select = ()
2018
2019    def add_select_col(self, col, name):
2020        self.select += (col,)
2021        self.values_select += (name,)
2022
2023    def set_select(self, cols):
2024        self.default_cols = False
2025        self.select = tuple(cols)
2026
2027    def add_distinct_fields(self, *field_names):
2028        """
2029        Add and resolve the given fields to the query's "distinct on" clause.
2030        """
2031        self.distinct_fields = field_names
2032        self.distinct = True
2033
2034    def add_fields(self, field_names, allow_m2m=True):
2035        """
2036        Add the given (model) fields to the select set. Add the field names in
2037        the order specified.
2038        """
2039        alias = self.get_initial_alias()
2040        opts = self.get_meta()
2041
2042        try:
2043            cols = []
2044            for name in field_names:
2045                # Join promotion note - we must not remove any rows here, so
2046                # if there is no existing joins, use outer join.
2047                join_info = self.setup_joins(
2048                    name.split(LOOKUP_SEP), opts, alias, allow_many=allow_m2m
2049                )
2050                targets, final_alias, joins = self.trim_joins(
2051                    join_info.targets,
2052                    join_info.joins,
2053                    join_info.path,
2054                )
2055                for target in targets:
2056                    cols.append(join_info.transform_function(target, final_alias))
2057            if cols:
2058                self.set_select(cols)
2059        except MultiJoin:
2060            raise FieldError(f"Invalid field name: '{name}'")
2061        except FieldError:
2062            if LOOKUP_SEP in name:
2063                # For lookups spanning over relationships, show the error
2064                # from the model on which the lookup failed.
2065                raise
2066            elif name in self.annotations:
2067                raise FieldError(
2068                    f"Cannot select the '{name}' alias. Use annotate() to promote it."
2069                )
2070            else:
2071                names = sorted(
2072                    [
2073                        *get_field_names_from_opts(opts),
2074                        *self.extra,
2075                        *self.annotation_select,
2076                        *self._filtered_relations,
2077                    ]
2078                )
2079                raise FieldError(
2080                    "Cannot resolve keyword {!r} into field. Choices are: {}".format(
2081                        name, ", ".join(names)
2082                    )
2083                )
2084
2085    def add_ordering(self, *ordering):
2086        """
2087        Add items from the 'ordering' sequence to the query's "order by"
2088        clause. These items are either field names (not column names) --
2089        possibly with a direction prefix ('-' or '?') -- or OrderBy
2090        expressions.
2091
2092        If 'ordering' is empty, clear all ordering from the query.
2093        """
2094        errors = []
2095        for item in ordering:
2096            if isinstance(item, str):
2097                if item == "?":
2098                    continue
2099                item = item.removeprefix("-")
2100                if item in self.annotations:
2101                    continue
2102                if self.extra and item in self.extra:
2103                    continue
2104                # names_to_path() validates the lookup. A descriptive
2105                # FieldError will be raise if it's not.
2106                self.names_to_path(item.split(LOOKUP_SEP), self.model._meta)
2107            elif not hasattr(item, "resolve_expression"):
2108                errors.append(item)
2109            if getattr(item, "contains_aggregate", False):
2110                raise FieldError(
2111                    "Using an aggregate in order_by() without also including "
2112                    f"it in annotate() is not allowed: {item}"
2113                )
2114        if errors:
2115            raise FieldError(f"Invalid order_by arguments: {errors}")
2116        if ordering:
2117            self.order_by += ordering
2118        else:
2119            self.default_ordering = False
2120
2121    def clear_ordering(self, force=False, clear_default=True):
2122        """
2123        Remove any ordering settings if the current query allows it without
2124        side effects, set 'force' to True to clear the ordering regardless.
2125        If 'clear_default' is True, there will be no ordering in the resulting
2126        query (not even the model's default).
2127        """
2128        if not force and (
2129            self.is_sliced or self.distinct_fields or self.select_for_update
2130        ):
2131            return
2132        self.order_by = ()
2133        self.extra_order_by = ()
2134        if clear_default:
2135            self.default_ordering = False
2136
2137    def set_group_by(self, allow_aliases=True):
2138        """
2139        Expand the GROUP BY clause required by the query.
2140
2141        This will usually be the set of all non-aggregate fields in the
2142        return data. If the database backend supports grouping by the
2143        primary key, and the query would be equivalent, the optimization
2144        will be made automatically.
2145        """
2146        if allow_aliases and self.values_select:
2147            # If grouping by aliases is allowed assign selected value aliases
2148            # by moving them to annotations.
2149            group_by_annotations = {}
2150            values_select = {}
2151            for alias, expr in zip(self.values_select, self.select):
2152                if isinstance(expr, Col):
2153                    values_select[alias] = expr
2154                else:
2155                    group_by_annotations[alias] = expr
2156            self.annotations = {**group_by_annotations, **self.annotations}
2157            self.append_annotation_mask(group_by_annotations)
2158            self.select = tuple(values_select.values())
2159            self.values_select = tuple(values_select)
2160        group_by = list(self.select)
2161        for alias, annotation in self.annotation_select.items():
2162            if not (group_by_cols := annotation.get_group_by_cols()):
2163                continue
2164            if allow_aliases and not annotation.contains_aggregate:
2165                group_by.append(Ref(alias, annotation))
2166            else:
2167                group_by.extend(group_by_cols)
2168        self.group_by = tuple(group_by)
2169
2170    def add_select_related(self, fields):
2171        """
2172        Set up the select_related data structure so that we only select
2173        certain related models (as opposed to all models, when
2174        self.select_related=True).
2175        """
2176        if isinstance(self.select_related, bool):
2177            field_dict = {}
2178        else:
2179            field_dict = self.select_related
2180        for field in fields:
2181            d = field_dict
2182            for part in field.split(LOOKUP_SEP):
2183                d = d.setdefault(part, {})
2184        self.select_related = field_dict
2185
2186    def add_extra(self, select, select_params, where, params, tables, order_by):
2187        """
2188        Add data to the various extra_* attributes for user-created additions
2189        to the query.
2190        """
2191        if select:
2192            # We need to pair any placeholder markers in the 'select'
2193            # dictionary with their parameters in 'select_params' so that
2194            # subsequent updates to the select dictionary also adjust the
2195            # parameters appropriately.
2196            select_pairs = {}
2197            if select_params:
2198                param_iter = iter(select_params)
2199            else:
2200                param_iter = iter([])
2201            for name, entry in select.items():
2202                self.check_alias(name)
2203                entry = str(entry)
2204                entry_params = []
2205                pos = entry.find("%s")
2206                while pos != -1:
2207                    if pos == 0 or entry[pos - 1] != "%":
2208                        entry_params.append(next(param_iter))
2209                    pos = entry.find("%s", pos + 2)
2210                select_pairs[name] = (entry, entry_params)
2211            self.extra.update(select_pairs)
2212        if where or params:
2213            self.where.add(ExtraWhere(where, params), AND)
2214        if tables:
2215            self.extra_tables += tuple(tables)
2216        if order_by:
2217            self.extra_order_by = order_by
2218
2219    def clear_deferred_loading(self):
2220        """Remove any fields from the deferred loading set."""
2221        self.deferred_loading = (frozenset(), True)
2222
2223    def add_deferred_loading(self, field_names):
2224        """
2225        Add the given list of model field names to the set of fields to
2226        exclude from loading from the database when automatic column selection
2227        is done. Add the new field names to any existing field names that
2228        are deferred (or removed from any existing field names that are marked
2229        as the only ones for immediate loading).
2230        """
2231        # Fields on related models are stored in the literal double-underscore
2232        # format, so that we can use a set datastructure. We do the foo__bar
2233        # splitting and handling when computing the SQL column names (as part of
2234        # get_columns()).
2235        existing, defer = self.deferred_loading
2236        if defer:
2237            # Add to existing deferred names.
2238            self.deferred_loading = existing.union(field_names), True
2239        else:
2240            # Remove names from the set of any existing "immediate load" names.
2241            if new_existing := existing.difference(field_names):
2242                self.deferred_loading = new_existing, False
2243            else:
2244                self.clear_deferred_loading()
2245                if new_only := set(field_names).difference(existing):
2246                    self.deferred_loading = new_only, True
2247
2248    def add_immediate_loading(self, field_names):
2249        """
2250        Add the given list of model field names to the set of fields to
2251        retrieve when the SQL is executed ("immediate loading" fields). The
2252        field names replace any existing immediate loading field names. If
2253        there are field names already specified for deferred loading, remove
2254        those names from the new field_names before storing the new names
2255        for immediate loading. (That is, immediate loading overrides any
2256        existing immediate values, but respects existing deferrals.)
2257        """
2258        existing, defer = self.deferred_loading
2259        field_names = set(field_names)
2260        if "pk" in field_names:
2261            field_names.remove("pk")
2262            field_names.add(self.get_meta().pk.name)
2263
2264        if defer:
2265            # Remove any existing deferred names from the current set before
2266            # setting the new names.
2267            self.deferred_loading = field_names.difference(existing), False
2268        else:
2269            # Replace any existing "immediate load" field names.
2270            self.deferred_loading = frozenset(field_names), False
2271
2272    def set_annotation_mask(self, names):
2273        """Set the mask of annotations that will be returned by the SELECT."""
2274        if names is None:
2275            self.annotation_select_mask = None
2276        else:
2277            self.annotation_select_mask = set(names)
2278        self._annotation_select_cache = None
2279
2280    def append_annotation_mask(self, names):
2281        if self.annotation_select_mask is not None:
2282            self.set_annotation_mask(self.annotation_select_mask.union(names))
2283
2284    def set_extra_mask(self, names):
2285        """
2286        Set the mask of extra select items that will be returned by SELECT.
2287        Don't remove them from the Query since they might be used later.
2288        """
2289        if names is None:
2290            self.extra_select_mask = None
2291        else:
2292            self.extra_select_mask = set(names)
2293        self._extra_select_cache = None
2294
2295    def set_values(self, fields):
2296        self.select_related = False
2297        self.clear_deferred_loading()
2298        self.clear_select_fields()
2299        self.has_select_fields = True
2300
2301        if fields:
2302            field_names = []
2303            extra_names = []
2304            annotation_names = []
2305            if not self.extra and not self.annotations:
2306                # Shortcut - if there are no extra or annotations, then
2307                # the values() clause must be just field names.
2308                field_names = list(fields)
2309            else:
2310                self.default_cols = False
2311                for f in fields:
2312                    if f in self.extra_select:
2313                        extra_names.append(f)
2314                    elif f in self.annotation_select:
2315                        annotation_names.append(f)
2316                    else:
2317                        field_names.append(f)
2318            self.set_extra_mask(extra_names)
2319            self.set_annotation_mask(annotation_names)
2320            selected = frozenset(field_names + extra_names + annotation_names)
2321        else:
2322            field_names = [f.attname for f in self.model._meta.concrete_fields]
2323            selected = frozenset(field_names)
2324        # Selected annotations must be known before setting the GROUP BY
2325        # clause.
2326        if self.group_by is True:
2327            self.add_fields(
2328                (f.attname for f in self.model._meta.concrete_fields), False
2329            )
2330            # Disable GROUP BY aliases to avoid orphaning references to the
2331            # SELECT clause which is about to be cleared.
2332            self.set_group_by(allow_aliases=False)
2333            self.clear_select_fields()
2334        elif self.group_by:
2335            # Resolve GROUP BY annotation references if they are not part of
2336            # the selected fields anymore.
2337            group_by = []
2338            for expr in self.group_by:
2339                if isinstance(expr, Ref) and expr.refs not in selected:
2340                    expr = self.annotations[expr.refs]
2341                group_by.append(expr)
2342            self.group_by = tuple(group_by)
2343
2344        self.values_select = tuple(field_names)
2345        self.add_fields(field_names, True)
2346
2347    @property
2348    def annotation_select(self):
2349        """
2350        Return the dictionary of aggregate columns that are not masked and
2351        should be used in the SELECT clause. Cache this result for performance.
2352        """
2353        if self._annotation_select_cache is not None:
2354            return self._annotation_select_cache
2355        elif not self.annotations:
2356            return {}
2357        elif self.annotation_select_mask is not None:
2358            self._annotation_select_cache = {
2359                k: v
2360                for k, v in self.annotations.items()
2361                if k in self.annotation_select_mask
2362            }
2363            return self._annotation_select_cache
2364        else:
2365            return self.annotations
2366
2367    @property
2368    def extra_select(self):
2369        if self._extra_select_cache is not None:
2370            return self._extra_select_cache
2371        if not self.extra:
2372            return {}
2373        elif self.extra_select_mask is not None:
2374            self._extra_select_cache = {
2375                k: v for k, v in self.extra.items() if k in self.extra_select_mask
2376            }
2377            return self._extra_select_cache
2378        else:
2379            return self.extra
2380
2381    def trim_start(self, names_with_path):
2382        """
2383        Trim joins from the start of the join path. The candidates for trim
2384        are the PathInfos in names_with_path structure that are m2m joins.
2385
2386        Also set the select column so the start matches the join.
2387
2388        This method is meant to be used for generating the subquery joins &
2389        cols in split_exclude().
2390
2391        Return a lookup usable for doing outerq.filter(lookup=self) and a
2392        boolean indicating if the joins in the prefix contain a LEFT OUTER join.
2393        _"""
2394        all_paths = []
2395        for _, paths in names_with_path:
2396            all_paths.extend(paths)
2397        contains_louter = False
2398        # Trim and operate only on tables that were generated for
2399        # the lookup part of the query. That is, avoid trimming
2400        # joins generated for F() expressions.
2401        lookup_tables = [
2402            t for t in self.alias_map if t in self._lookup_joins or t == self.base_table
2403        ]
2404        for trimmed_paths, path in enumerate(all_paths):
2405            if path.m2m:
2406                break
2407            if self.alias_map[lookup_tables[trimmed_paths + 1]].join_type == LOUTER:
2408                contains_louter = True
2409            alias = lookup_tables[trimmed_paths]
2410            self.unref_alias(alias)
2411        # The path.join_field is a Rel, lets get the other side's field
2412        join_field = path.join_field.field
2413        # Build the filter prefix.
2414        paths_in_prefix = trimmed_paths
2415        trimmed_prefix = []
2416        for name, path in names_with_path:
2417            if paths_in_prefix - len(path) < 0:
2418                break
2419            trimmed_prefix.append(name)
2420            paths_in_prefix -= len(path)
2421        trimmed_prefix.append(join_field.foreign_related_fields[0].name)
2422        trimmed_prefix = LOOKUP_SEP.join(trimmed_prefix)
2423        # Lets still see if we can trim the first join from the inner query
2424        # (that is, self). We can't do this for:
2425        # - LEFT JOINs because we would miss those rows that have nothing on
2426        #   the outer side,
2427        # - INNER JOINs from filtered relations because we would miss their
2428        #   filters.
2429        first_join = self.alias_map[lookup_tables[trimmed_paths + 1]]
2430        if first_join.join_type != LOUTER and not first_join.filtered_relation:
2431            select_fields = [r[0] for r in join_field.related_fields]
2432            select_alias = lookup_tables[trimmed_paths + 1]
2433            self.unref_alias(lookup_tables[trimmed_paths])
2434            extra_restriction = join_field.get_extra_restriction(
2435                None, lookup_tables[trimmed_paths + 1]
2436            )
2437            if extra_restriction:
2438                self.where.add(extra_restriction, AND)
2439        else:
2440            # TODO: It might be possible to trim more joins from the start of the
2441            # inner query if it happens to have a longer join chain containing the
2442            # values in select_fields. Lets punt this one for now.
2443            select_fields = [r[1] for r in join_field.related_fields]
2444            select_alias = lookup_tables[trimmed_paths]
2445        # The found starting point is likely a join_class instead of a
2446        # base_table_class reference. But the first entry in the query's FROM
2447        # clause must not be a JOIN.
2448        for table in self.alias_map:
2449            if self.alias_refcount[table] > 0:
2450                self.alias_map[table] = self.base_table_class(
2451                    self.alias_map[table].table_name,
2452                    table,
2453                )
2454                break
2455        self.set_select([f.get_col(select_alias) for f in select_fields])
2456        return trimmed_prefix, contains_louter
2457
2458    def is_nullable(self, field):
2459        """Check if the given field should be treated as nullable."""
2460        # QuerySet does not have knowledge of which connection is going to be
2461        # used. For the single-database setup we always reference the default
2462        # connection here.
2463        return field.allow_null
2464
2465
2466def get_order_dir(field, default="ASC"):
2467    """
2468    Return the field name and direction for an order specification. For
2469    example, '-foo' is returned as ('foo', 'DESC').
2470
2471    The 'default' param is used to indicate which way no prefix (or a '+'
2472    prefix) should sort. The '-' prefix always sorts the opposite way.
2473    """
2474    dirn = ORDER_DIR[default]
2475    if field[0] == "-":
2476        return field[1:], dirn[1]
2477    return field, dirn[0]
2478
2479
2480class JoinPromoter:
2481    """
2482    A class to abstract away join promotion problems for complex filter
2483    conditions.
2484    """
2485
2486    def __init__(self, connector, num_children, negated):
2487        self.connector = connector
2488        self.negated = negated
2489        if self.negated:
2490            if connector == AND:
2491                self.effective_connector = OR
2492            else:
2493                self.effective_connector = AND
2494        else:
2495            self.effective_connector = self.connector
2496        self.num_children = num_children
2497        # Maps of table alias to how many times it is seen as required for
2498        # inner and/or outer joins.
2499        self.votes = Counter()
2500
2501    def __repr__(self):
2502        return (
2503            f"{self.__class__.__qualname__}(connector={self.connector!r}, "
2504            f"num_children={self.num_children!r}, negated={self.negated!r})"
2505        )
2506
2507    def add_votes(self, votes):
2508        """
2509        Add single vote per item to self.votes. Parameter can be any
2510        iterable.
2511        """
2512        self.votes.update(votes)
2513
2514    def update_join_types(self, query):
2515        """
2516        Change join types so that the generated query is as efficient as
2517        possible, but still correct. So, change as many joins as possible
2518        to INNER, but don't make OUTER joins INNER if that could remove
2519        results from the query.
2520        """
2521        to_promote = set()
2522        to_demote = set()
2523        # The effective_connector is used so that NOT (a AND b) is treated
2524        # similarly to (a OR b) for join promotion.
2525        for table, votes in self.votes.items():
2526            # We must use outer joins in OR case when the join isn't contained
2527            # in all of the joins. Otherwise the INNER JOIN itself could remove
2528            # valid results. Consider the case where a model with rel_a and
2529            # rel_b relations is queried with rel_a__col=1 | rel_b__col=2. Now,
2530            # if rel_a join doesn't produce any results is null (for example
2531            # reverse foreign key or null value in direct foreign key), and
2532            # there is a matching row in rel_b with col=2, then an INNER join
2533            # to rel_a would remove a valid match from the query. So, we need
2534            # to promote any existing INNER to LOUTER (it is possible this
2535            # promotion in turn will be demoted later on).
2536            if self.effective_connector == OR and votes < self.num_children:
2537                to_promote.add(table)
2538            # If connector is AND and there is a filter that can match only
2539            # when there is a joinable row, then use INNER. For example, in
2540            # rel_a__col=1 & rel_b__col=2, if either of the rels produce NULL
2541            # as join output, then the col=1 or col=2 can't match (as
2542            # NULL=anything is always false).
2543            # For the OR case, if all children voted for a join to be inner,
2544            # then we can use INNER for the join. For example:
2545            #     (rel_a__col__icontains=Alex | rel_a__col__icontains=Russell)
2546            # then if rel_a doesn't produce any rows, the whole condition
2547            # can't match. Hence we can safely use INNER join.
2548            if self.effective_connector == AND or (
2549                self.effective_connector == OR and votes == self.num_children
2550            ):
2551                to_demote.add(table)
2552            # Finally, what happens in cases where we have:
2553            #    (rel_a__col=1|rel_b__col=2) & rel_a__col__gte=0
2554            # Now, we first generate the OR clause, and promote joins for it
2555            # in the first if branch above. Both rel_a and rel_b are promoted
2556            # to LOUTER joins. After that we do the AND case. The OR case
2557            # voted no inner joins but the rel_a__col__gte=0 votes inner join
2558            # for rel_a. We demote it back to INNER join (in AND case a single
2559            # vote is enough). The demotion is OK, if rel_a doesn't produce
2560            # rows, then the rel_a__col__gte=0 clause can't be true, and thus
2561            # the whole clause must be false. So, it is safe to use INNER
2562            # join.
2563            # Note that in this example we could just as well have the __gte
2564            # clause and the OR clause swapped. Or we could replace the __gte
2565            # clause with an OR clause containing rel_a__col=1|rel_a__col=2,
2566            # and again we could safely demote to INNER.
2567        query.promote_joins(to_promote)
2568        query.demote_joins(to_demote)
2569        return to_demote