1"""
2Create SQL statements for QuerySets.
3
4The code in here encapsulates all of the SQL construction so that QuerySets
5themselves do not have to (and could be backed by things other than SQL
6databases). The abstraction barrier only works one way: this module has to know
7all about the internals of models in order to get the information it needs.
8"""
9
10import copy
11import difflib
12import functools
13import sys
14from collections import Counter, namedtuple
15from collections.abc import Iterator, Mapping
16from functools import cached_property
17from itertools import chain, count, product
18from string import ascii_uppercase
19
20from plain.exceptions import FieldDoesNotExist, FieldError
21from plain.models.aggregates import Count
22from plain.models.constants import LOOKUP_SEP
23from plain.models.db import NotSupportedError, db_connection
24from plain.models.expressions import (
25 BaseExpression,
26 Col,
27 Exists,
28 F,
29 OuterRef,
30 Ref,
31 ResolvedOuterRef,
32 Value,
33)
34from plain.models.fields import Field
35from plain.models.fields.related_lookups import MultiColSource
36from plain.models.lookups import Lookup
37from plain.models.query_utils import (
38 Q,
39 check_rel_lookup_compatibility,
40 refs_expression,
41)
42from plain.models.sql.constants import INNER, LOUTER, ORDER_DIR, SINGLE
43from plain.models.sql.datastructures import BaseTable, Empty, Join, MultiJoin
44from plain.models.sql.where import AND, OR, ExtraWhere, NothingNode, WhereNode
45from plain.utils.regex_helper import _lazy_re_compile
46from plain.utils.tree import Node
47
48__all__ = ["Query", "RawQuery"]
49
50# Quotation marks ('"`[]), whitespace characters, semicolons, or inline
51# SQL comments are forbidden in column aliases.
52FORBIDDEN_ALIAS_PATTERN = _lazy_re_compile(r"['`\"\]\[;\s]|--|/\*|\*/")
53
54# Inspired from
55# https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS
56EXPLAIN_OPTIONS_PATTERN = _lazy_re_compile(r"[\w\-]+")
57
58
59def get_field_names_from_opts(opts):
60 if opts is None:
61 return set()
62 return set(
63 chain.from_iterable(
64 (f.name, f.attname) if f.concrete else (f.name,) for f in opts.get_fields()
65 )
66 )
67
68
69def get_children_from_q(q):
70 for child in q.children:
71 if isinstance(child, Node):
72 yield from get_children_from_q(child)
73 else:
74 yield child
75
76
77JoinInfo = namedtuple(
78 "JoinInfo",
79 ("final_field", "targets", "opts", "joins", "path", "transform_function"),
80)
81
82
83class RawQuery:
84 """A single raw SQL query."""
85
86 def __init__(self, sql, params=()):
87 self.params = params
88 self.sql = sql
89 self.cursor = None
90
91 # Mirror some properties of a normal query so that
92 # the compiler can be used to process results.
93 self.low_mark, self.high_mark = 0, None # Used for offset/limit
94 self.extra_select = {}
95 self.annotation_select = {}
96
97 def chain(self):
98 return self.clone()
99
100 def clone(self):
101 return RawQuery(self.sql, params=self.params)
102
103 def get_columns(self):
104 if self.cursor is None:
105 self._execute_query()
106 converter = db_connection.introspection.identifier_converter
107 return [converter(column_meta[0]) for column_meta in self.cursor.description]
108
109 def __iter__(self):
110 # Always execute a new query for a new iterator.
111 # This could be optimized with a cache at the expense of RAM.
112 self._execute_query()
113 if not db_connection.features.can_use_chunked_reads:
114 # If the database can't use chunked reads we need to make sure we
115 # evaluate the entire query up front.
116 result = list(self.cursor)
117 else:
118 result = self.cursor
119 return iter(result)
120
121 def __repr__(self):
122 return f"<{self.__class__.__name__}: {self}>"
123
124 @property
125 def params_type(self):
126 if self.params is None:
127 return None
128 return dict if isinstance(self.params, Mapping) else tuple
129
130 def __str__(self):
131 if self.params_type is None:
132 return self.sql
133 return self.sql % self.params_type(self.params)
134
135 def _execute_query(self):
136 # Adapt parameters to the database, as much as possible considering
137 # that the target type isn't known. See #17755.
138 params_type = self.params_type
139 adapter = db_connection.ops.adapt_unknown_value
140 if params_type is tuple:
141 params = tuple(adapter(val) for val in self.params)
142 elif params_type is dict:
143 params = {key: adapter(val) for key, val in self.params.items()}
144 elif params_type is None:
145 params = None
146 else:
147 raise RuntimeError(f"Unexpected params type: {params_type}")
148
149 self.cursor = db_connection.cursor()
150 self.cursor.execute(self.sql, params)
151
152
153ExplainInfo = namedtuple("ExplainInfo", ("format", "options"))
154
155
156class Query(BaseExpression):
157 """A single SQL query."""
158
159 alias_prefix = "T"
160 empty_result_set_value = None
161 subq_aliases = frozenset([alias_prefix])
162
163 compiler = "SQLCompiler"
164
165 base_table_class = BaseTable
166 join_class = Join
167
168 default_cols = True
169 default_ordering = True
170 standard_ordering = True
171
172 filter_is_sticky = False
173 subquery = False
174
175 # SQL-related attributes.
176 # Select and related select clauses are expressions to use in the SELECT
177 # clause of the query. The select is used for cases where we want to set up
178 # the select clause to contain other than default fields (values(),
179 # subqueries...). Note that annotations go to annotations dictionary.
180 select = ()
181 # The group_by attribute can have one of the following forms:
182 # - None: no group by at all in the query
183 # - A tuple of expressions: group by (at least) those expressions.
184 # String refs are also allowed for now.
185 # - True: group by all select fields of the model
186 # See compiler.get_group_by() for details.
187 group_by = None
188 order_by = ()
189 low_mark = 0 # Used for offset/limit.
190 high_mark = None # Used for offset/limit.
191 distinct = False
192 distinct_fields = ()
193 select_for_update = False
194 select_for_update_nowait = False
195 select_for_update_skip_locked = False
196 select_for_update_of = ()
197 select_for_no_key_update = False
198 select_related = False
199 has_select_fields = False
200 # Arbitrary limit for select_related to prevents infinite recursion.
201 max_depth = 5
202 # Holds the selects defined by a call to values() or values_list()
203 # excluding annotation_select and extra_select.
204 values_select = ()
205
206 # SQL annotation-related attributes.
207 annotation_select_mask = None
208 _annotation_select_cache = None
209
210 # Set combination attributes.
211 combinator = None
212 combinator_all = False
213 combined_queries = ()
214
215 # These are for extensions. The contents are more or less appended verbatim
216 # to the appropriate clause.
217 extra_select_mask = None
218 _extra_select_cache = None
219
220 extra_tables = ()
221 extra_order_by = ()
222
223 # A tuple that is a set of model field names and either True, if these are
224 # the fields to defer, or False if these are the only fields to load.
225 deferred_loading = (frozenset(), True)
226
227 explain_info = None
228
229 def __init__(self, model, alias_cols=True):
230 self.model = model
231 self.alias_refcount = {}
232 # alias_map is the most important data structure regarding joins.
233 # It's used for recording which joins exist in the query and what
234 # types they are. The key is the alias of the joined table (possibly
235 # the table name) and the value is a Join-like object (see
236 # sql.datastructures.Join for more information).
237 self.alias_map = {}
238 # Whether to provide alias to columns during reference resolving.
239 self.alias_cols = alias_cols
240 # Sometimes the query contains references to aliases in outer queries (as
241 # a result of split_exclude). Correct alias quoting needs to know these
242 # aliases too.
243 # Map external tables to whether they are aliased.
244 self.external_aliases = {}
245 self.table_map = {} # Maps table names to list of aliases.
246 self.used_aliases = set()
247
248 self.where = WhereNode()
249 # Maps alias -> Annotation Expression.
250 self.annotations = {}
251 # These are for extensions. The contents are more or less appended
252 # verbatim to the appropriate clause.
253 self.extra = {} # Maps col_alias -> (col_sql, params).
254
255 self._filtered_relations = {}
256
257 @property
258 def output_field(self):
259 if len(self.select) == 1:
260 select = self.select[0]
261 return getattr(select, "target", None) or select.field
262 elif len(self.annotation_select) == 1:
263 return next(iter(self.annotation_select.values())).output_field
264
265 @cached_property
266 def base_table(self):
267 for alias in self.alias_map:
268 return alias
269
270 def __str__(self):
271 """
272 Return the query as a string of SQL with the parameter values
273 substituted in (use sql_with_params() to see the unsubstituted string).
274
275 Parameter values won't necessarily be quoted correctly, since that is
276 done by the database interface at execution time.
277 """
278 sql, params = self.sql_with_params()
279 return sql % params
280
281 def sql_with_params(self):
282 """
283 Return the query as an SQL string and the parameters that will be
284 substituted into the query.
285 """
286 return self.get_compiler().as_sql()
287
288 def __deepcopy__(self, memo):
289 """Limit the amount of work when a Query is deepcopied."""
290 result = self.clone()
291 memo[id(self)] = result
292 return result
293
294 def get_compiler(self, *, elide_empty=True):
295 return db_connection.ops.compiler(self.compiler)(
296 self, db_connection, elide_empty
297 )
298
299 def get_meta(self):
300 """
301 Return the Options instance (the model._meta) from which to start
302 processing. Normally, this is self.model._meta, but it can be changed
303 by subclasses.
304 """
305 if self.model:
306 return self.model._meta
307
308 def clone(self):
309 """
310 Return a copy of the current Query. A lightweight alternative to
311 deepcopy().
312 """
313 obj = Empty()
314 obj.__class__ = self.__class__
315 # Copy references to everything.
316 obj.__dict__ = self.__dict__.copy()
317 # Clone attributes that can't use shallow copy.
318 obj.alias_refcount = self.alias_refcount.copy()
319 obj.alias_map = self.alias_map.copy()
320 obj.external_aliases = self.external_aliases.copy()
321 obj.table_map = self.table_map.copy()
322 obj.where = self.where.clone()
323 obj.annotations = self.annotations.copy()
324 if self.annotation_select_mask is not None:
325 obj.annotation_select_mask = self.annotation_select_mask.copy()
326 if self.combined_queries:
327 obj.combined_queries = tuple(
328 [query.clone() for query in self.combined_queries]
329 )
330 # _annotation_select_cache cannot be copied, as doing so breaks the
331 # (necessary) state in which both annotations and
332 # _annotation_select_cache point to the same underlying objects.
333 # It will get re-populated in the cloned queryset the next time it's
334 # used.
335 obj._annotation_select_cache = None
336 obj.extra = self.extra.copy()
337 if self.extra_select_mask is not None:
338 obj.extra_select_mask = self.extra_select_mask.copy()
339 if self._extra_select_cache is not None:
340 obj._extra_select_cache = self._extra_select_cache.copy()
341 if self.select_related is not False:
342 # Use deepcopy because select_related stores fields in nested
343 # dicts.
344 obj.select_related = copy.deepcopy(obj.select_related)
345 if "subq_aliases" in self.__dict__:
346 obj.subq_aliases = self.subq_aliases.copy()
347 obj.used_aliases = self.used_aliases.copy()
348 obj._filtered_relations = self._filtered_relations.copy()
349 # Clear the cached_property, if it exists.
350 obj.__dict__.pop("base_table", None)
351 return obj
352
353 def chain(self, klass=None):
354 """
355 Return a copy of the current Query that's ready for another operation.
356 The klass argument changes the type of the Query, e.g. UpdateQuery.
357 """
358 obj = self.clone()
359 if klass and obj.__class__ != klass:
360 obj.__class__ = klass
361 if not obj.filter_is_sticky:
362 obj.used_aliases = set()
363 obj.filter_is_sticky = False
364 if hasattr(obj, "_setup_query"):
365 obj._setup_query()
366 return obj
367
368 def relabeled_clone(self, change_map):
369 clone = self.clone()
370 clone.change_aliases(change_map)
371 return clone
372
373 def _get_col(self, target, field, alias):
374 if not self.alias_cols:
375 alias = None
376 return target.get_col(alias, field)
377
378 def get_aggregation(self, aggregate_exprs):
379 """
380 Return the dictionary with the values of the existing aggregations.
381 """
382 if not aggregate_exprs:
383 return {}
384 aggregates = {}
385 for alias, aggregate_expr in aggregate_exprs.items():
386 self.check_alias(alias)
387 aggregate = aggregate_expr.resolve_expression(
388 self, allow_joins=True, reuse=None, summarize=True
389 )
390 if not aggregate.contains_aggregate:
391 raise TypeError(f"{alias} is not an aggregate expression")
392 aggregates[alias] = aggregate
393 # Existing usage of aggregation can be determined by the presence of
394 # selected aggregates but also by filters against aliased aggregates.
395 _, having, qualify = self.where.split_having_qualify()
396 has_existing_aggregation = (
397 any(
398 getattr(annotation, "contains_aggregate", True)
399 for annotation in self.annotations.values()
400 )
401 or having
402 )
403 # Decide if we need to use a subquery.
404 #
405 # Existing aggregations would cause incorrect results as
406 # get_aggregation() must produce just one result and thus must not use
407 # GROUP BY.
408 #
409 # If the query has limit or distinct, or uses set operations, then
410 # those operations must be done in a subquery so that the query
411 # aggregates on the limit and/or distinct results instead of applying
412 # the distinct and limit after the aggregation.
413 if (
414 isinstance(self.group_by, tuple)
415 or self.is_sliced
416 or has_existing_aggregation
417 or qualify
418 or self.distinct
419 or self.combinator
420 ):
421 from plain.models.sql.subqueries import AggregateQuery
422
423 inner_query = self.clone()
424 inner_query.subquery = True
425 outer_query = AggregateQuery(self.model, inner_query)
426 inner_query.select_for_update = False
427 inner_query.select_related = False
428 inner_query.set_annotation_mask(self.annotation_select)
429 # Queries with distinct_fields need ordering and when a limit is
430 # applied we must take the slice from the ordered query. Otherwise
431 # no need for ordering.
432 inner_query.clear_ordering(force=False)
433 if not inner_query.distinct:
434 # If the inner query uses default select and it has some
435 # aggregate annotations, then we must make sure the inner
436 # query is grouped by the main model's primary key. However,
437 # clearing the select clause can alter results if distinct is
438 # used.
439 if inner_query.default_cols and has_existing_aggregation:
440 inner_query.group_by = (
441 self.model._meta.get_field("id").get_col(
442 inner_query.get_initial_alias()
443 ),
444 )
445 inner_query.default_cols = False
446 if not qualify:
447 # Mask existing annotations that are not referenced by
448 # aggregates to be pushed to the outer query unless
449 # filtering against window functions is involved as it
450 # requires complex realising.
451 annotation_mask = set()
452 for aggregate in aggregates.values():
453 annotation_mask |= aggregate.get_refs()
454 inner_query.set_annotation_mask(annotation_mask)
455
456 # Add aggregates to the outer AggregateQuery. This requires making
457 # sure all columns referenced by the aggregates are selected in the
458 # inner query. It is achieved by retrieving all column references
459 # by the aggregates, explicitly selecting them in the inner query,
460 # and making sure the aggregates are repointed to them.
461 col_refs = {}
462 for alias, aggregate in aggregates.items():
463 replacements = {}
464 for col in self._gen_cols([aggregate], resolve_refs=False):
465 if not (col_ref := col_refs.get(col)):
466 index = len(col_refs) + 1
467 col_alias = f"__col{index}"
468 col_ref = Ref(col_alias, col)
469 col_refs[col] = col_ref
470 inner_query.annotations[col_alias] = col
471 inner_query.append_annotation_mask([col_alias])
472 replacements[col] = col_ref
473 outer_query.annotations[alias] = aggregate.replace_expressions(
474 replacements
475 )
476 if (
477 inner_query.select == ()
478 and not inner_query.default_cols
479 and not inner_query.annotation_select_mask
480 ):
481 # In case of Model.objects[0:3].count(), there would be no
482 # field selected in the inner query, yet we must use a subquery.
483 # So, make sure at least one field is selected.
484 inner_query.select = (
485 self.model._meta.get_field("id").get_col(
486 inner_query.get_initial_alias()
487 ),
488 )
489 else:
490 outer_query = self
491 self.select = ()
492 self.default_cols = False
493 self.extra = {}
494 if self.annotations:
495 # Inline reference to existing annotations and mask them as
496 # they are unnecessary given only the summarized aggregations
497 # are requested.
498 replacements = {
499 Ref(alias, annotation): annotation
500 for alias, annotation in self.annotations.items()
501 }
502 self.annotations = {
503 alias: aggregate.replace_expressions(replacements)
504 for alias, aggregate in aggregates.items()
505 }
506 else:
507 self.annotations = aggregates
508 self.set_annotation_mask(aggregates)
509
510 empty_set_result = [
511 expression.empty_result_set_value
512 for expression in outer_query.annotation_select.values()
513 ]
514 elide_empty = not any(result is NotImplemented for result in empty_set_result)
515 outer_query.clear_ordering(force=True)
516 outer_query.clear_limits()
517 outer_query.select_for_update = False
518 outer_query.select_related = False
519 compiler = outer_query.get_compiler(elide_empty=elide_empty)
520 result = compiler.execute_sql(SINGLE)
521 if result is None:
522 result = empty_set_result
523 else:
524 converters = compiler.get_converters(outer_query.annotation_select.values())
525 result = next(compiler.apply_converters((result,), converters))
526
527 return dict(zip(outer_query.annotation_select, result))
528
529 def get_count(self):
530 """
531 Perform a COUNT() query using the current filter constraints.
532 """
533 obj = self.clone()
534 return obj.get_aggregation({"__count": Count("*")})["__count"]
535
536 def has_filters(self):
537 return self.where
538
539 def exists(self, limit=True):
540 q = self.clone()
541 if not (q.distinct and q.is_sliced):
542 if q.group_by is True:
543 q.add_fields(
544 (f.attname for f in self.model._meta.concrete_fields), False
545 )
546 # Disable GROUP BY aliases to avoid orphaning references to the
547 # SELECT clause which is about to be cleared.
548 q.set_group_by(allow_aliases=False)
549 q.clear_select_clause()
550 if q.combined_queries and q.combinator == "union":
551 q.combined_queries = tuple(
552 combined_query.exists(limit=False)
553 for combined_query in q.combined_queries
554 )
555 q.clear_ordering(force=True)
556 if limit:
557 q.set_limits(high=1)
558 q.add_annotation(Value(1), "a")
559 return q
560
561 def has_results(self):
562 q = self.exists()
563 compiler = q.get_compiler()
564 return compiler.has_results()
565
566 def explain(self, format=None, **options):
567 q = self.clone()
568 for option_name in options:
569 if (
570 not EXPLAIN_OPTIONS_PATTERN.fullmatch(option_name)
571 or "--" in option_name
572 ):
573 raise ValueError(f"Invalid option name: {option_name!r}.")
574 q.explain_info = ExplainInfo(format, options)
575 compiler = q.get_compiler()
576 return "\n".join(compiler.explain_query())
577
578 def combine(self, rhs, connector):
579 """
580 Merge the 'rhs' query into the current one (with any 'rhs' effects
581 being applied *after* (that is, "to the right of") anything in the
582 current query. 'rhs' is not modified during a call to this function.
583
584 The 'connector' parameter describes how to connect filters from the
585 'rhs' query.
586 """
587 if self.model != rhs.model:
588 raise TypeError("Cannot combine queries on two different base models.")
589 if self.is_sliced:
590 raise TypeError("Cannot combine queries once a slice has been taken.")
591 if self.distinct != rhs.distinct:
592 raise TypeError("Cannot combine a unique query with a non-unique query.")
593 if self.distinct_fields != rhs.distinct_fields:
594 raise TypeError("Cannot combine queries with different distinct fields.")
595
596 # If lhs and rhs shares the same alias prefix, it is possible to have
597 # conflicting alias changes like T4 -> T5, T5 -> T6, which might end up
598 # as T4 -> T6 while combining two querysets. To prevent this, change an
599 # alias prefix of the rhs and update current aliases accordingly,
600 # except if the alias is the base table since it must be present in the
601 # query on both sides.
602 initial_alias = self.get_initial_alias()
603 rhs.bump_prefix(self, exclude={initial_alias})
604
605 # Work out how to relabel the rhs aliases, if necessary.
606 change_map = {}
607 conjunction = connector == AND
608
609 # Determine which existing joins can be reused. When combining the
610 # query with AND we must recreate all joins for m2m filters. When
611 # combining with OR we can reuse joins. The reason is that in AND
612 # case a single row can't fulfill a condition like:
613 # revrel__col=1 & revrel__col=2
614 # But, there might be two different related rows matching this
615 # condition. In OR case a single True is enough, so single row is
616 # enough, too.
617 #
618 # Note that we will be creating duplicate joins for non-m2m joins in
619 # the AND case. The results will be correct but this creates too many
620 # joins. This is something that could be fixed later on.
621 reuse = set() if conjunction else set(self.alias_map)
622 joinpromoter = JoinPromoter(connector, 2, False)
623 joinpromoter.add_votes(
624 j for j in self.alias_map if self.alias_map[j].join_type == INNER
625 )
626 rhs_votes = set()
627 # Now, add the joins from rhs query into the new query (skipping base
628 # table).
629 rhs_tables = list(rhs.alias_map)[1:]
630 for alias in rhs_tables:
631 join = rhs.alias_map[alias]
632 # If the left side of the join was already relabeled, use the
633 # updated alias.
634 join = join.relabeled_clone(change_map)
635 new_alias = self.join(join, reuse=reuse)
636 if join.join_type == INNER:
637 rhs_votes.add(new_alias)
638 # We can't reuse the same join again in the query. If we have two
639 # distinct joins for the same connection in rhs query, then the
640 # combined query must have two joins, too.
641 reuse.discard(new_alias)
642 if alias != new_alias:
643 change_map[alias] = new_alias
644 if not rhs.alias_refcount[alias]:
645 # The alias was unused in the rhs query. Unref it so that it
646 # will be unused in the new query, too. We have to add and
647 # unref the alias so that join promotion has information of
648 # the join type for the unused alias.
649 self.unref_alias(new_alias)
650 joinpromoter.add_votes(rhs_votes)
651 joinpromoter.update_join_types(self)
652
653 # Combine subqueries aliases to ensure aliases relabelling properly
654 # handle subqueries when combining where and select clauses.
655 self.subq_aliases |= rhs.subq_aliases
656
657 # Now relabel a copy of the rhs where-clause and add it to the current
658 # one.
659 w = rhs.where.clone()
660 w.relabel_aliases(change_map)
661 self.where.add(w, connector)
662
663 # Selection columns and extra extensions are those provided by 'rhs'.
664 if rhs.select:
665 self.set_select([col.relabeled_clone(change_map) for col in rhs.select])
666 else:
667 self.select = ()
668
669 if connector == OR:
670 # It would be nice to be able to handle this, but the queries don't
671 # really make sense (or return consistent value sets). Not worth
672 # the extra complexity when you can write a real query instead.
673 if self.extra and rhs.extra:
674 raise ValueError(
675 "When merging querysets using 'or', you cannot have "
676 "extra(select=...) on both sides."
677 )
678 self.extra.update(rhs.extra)
679 extra_select_mask = set()
680 if self.extra_select_mask is not None:
681 extra_select_mask.update(self.extra_select_mask)
682 if rhs.extra_select_mask is not None:
683 extra_select_mask.update(rhs.extra_select_mask)
684 if extra_select_mask:
685 self.set_extra_mask(extra_select_mask)
686 self.extra_tables += rhs.extra_tables
687
688 # Ordering uses the 'rhs' ordering, unless it has none, in which case
689 # the current ordering is used.
690 self.order_by = rhs.order_by or self.order_by
691 self.extra_order_by = rhs.extra_order_by or self.extra_order_by
692
693 def _get_defer_select_mask(self, opts, mask, select_mask=None):
694 if select_mask is None:
695 select_mask = {}
696 select_mask[opts.get_field("id")] = {}
697 # All concrete fields that are not part of the defer mask must be
698 # loaded. If a relational field is encountered it gets added to the
699 # mask for it be considered if `select_related` and the cycle continues
700 # by recursively calling this function.
701 for field in opts.concrete_fields:
702 field_mask = mask.pop(field.name, None)
703 if field_mask is None:
704 select_mask.setdefault(field, {})
705 elif field_mask:
706 if not field.is_relation:
707 raise FieldError(next(iter(field_mask)))
708 field_select_mask = select_mask.setdefault(field, {})
709 related_model = field.remote_field.model._meta.concrete_model
710 self._get_defer_select_mask(
711 related_model._meta, field_mask, field_select_mask
712 )
713 # Remaining defer entries must be references to reverse relationships.
714 # The following code is expected to raise FieldError if it encounters
715 # a malformed defer entry.
716 for field_name, field_mask in mask.items():
717 if filtered_relation := self._filtered_relations.get(field_name):
718 relation = opts.get_field(filtered_relation.relation_name)
719 field_select_mask = select_mask.setdefault((field_name, relation), {})
720 field = relation.field
721 else:
722 field = opts.get_field(field_name).field
723 field_select_mask = select_mask.setdefault(field, {})
724 related_model = field.model._meta.concrete_model
725 self._get_defer_select_mask(
726 related_model._meta, field_mask, field_select_mask
727 )
728 return select_mask
729
730 def _get_only_select_mask(self, opts, mask, select_mask=None):
731 if select_mask is None:
732 select_mask = {}
733 select_mask[opts.get_field("id")] = {}
734 # Only include fields mentioned in the mask.
735 for field_name, field_mask in mask.items():
736 field = opts.get_field(field_name)
737 field_select_mask = select_mask.setdefault(field, {})
738 if field_mask:
739 if not field.is_relation:
740 raise FieldError(next(iter(field_mask)))
741 related_model = field.remote_field.model._meta.concrete_model
742 self._get_only_select_mask(
743 related_model._meta, field_mask, field_select_mask
744 )
745 return select_mask
746
747 def get_select_mask(self):
748 """
749 Convert the self.deferred_loading data structure to an alternate data
750 structure, describing the field that *will* be loaded. This is used to
751 compute the columns to select from the database and also by the
752 QuerySet class to work out which fields are being initialized on each
753 model. Models that have all their fields included aren't mentioned in
754 the result, only those that have field restrictions in place.
755 """
756 field_names, defer = self.deferred_loading
757 if not field_names:
758 return {}
759 mask = {}
760 for field_name in field_names:
761 part_mask = mask
762 for part in field_name.split(LOOKUP_SEP):
763 part_mask = part_mask.setdefault(part, {})
764 opts = self.get_meta()
765 if defer:
766 return self._get_defer_select_mask(opts, mask)
767 return self._get_only_select_mask(opts, mask)
768
769 def table_alias(self, table_name, create=False, filtered_relation=None):
770 """
771 Return a table alias for the given table_name and whether this is a
772 new alias or not.
773
774 If 'create' is true, a new alias is always created. Otherwise, the
775 most recently created alias for the table (if one exists) is reused.
776 """
777 alias_list = self.table_map.get(table_name)
778 if not create and alias_list:
779 alias = alias_list[0]
780 self.alias_refcount[alias] += 1
781 return alias, False
782
783 # Create a new alias for this table.
784 if alias_list:
785 alias = "%s%d" % (self.alias_prefix, len(self.alias_map) + 1) # noqa: UP031
786 alias_list.append(alias)
787 else:
788 # The first occurrence of a table uses the table name directly.
789 alias = (
790 filtered_relation.alias if filtered_relation is not None else table_name
791 )
792 self.table_map[table_name] = [alias]
793 self.alias_refcount[alias] = 1
794 return alias, True
795
796 def ref_alias(self, alias):
797 """Increases the reference count for this alias."""
798 self.alias_refcount[alias] += 1
799
800 def unref_alias(self, alias, amount=1):
801 """Decreases the reference count for this alias."""
802 self.alias_refcount[alias] -= amount
803
804 def promote_joins(self, aliases):
805 """
806 Promote recursively the join type of given aliases and its children to
807 an outer join. If 'unconditional' is False, only promote the join if
808 it is nullable or the parent join is an outer join.
809
810 The children promotion is done to avoid join chains that contain a LOUTER
811 b INNER c. So, if we have currently a INNER b INNER c and a->b is promoted,
812 then we must also promote b->c automatically, or otherwise the promotion
813 of a->b doesn't actually change anything in the query results.
814 """
815 aliases = list(aliases)
816 while aliases:
817 alias = aliases.pop(0)
818 if self.alias_map[alias].join_type is None:
819 # This is the base table (first FROM entry) - this table
820 # isn't really joined at all in the query, so we should not
821 # alter its join type.
822 continue
823 # Only the first alias (skipped above) should have None join_type
824 assert self.alias_map[alias].join_type is not None
825 parent_alias = self.alias_map[alias].parent_alias
826 parent_louter = (
827 parent_alias and self.alias_map[parent_alias].join_type == LOUTER
828 )
829 already_louter = self.alias_map[alias].join_type == LOUTER
830 if (self.alias_map[alias].nullable or parent_louter) and not already_louter:
831 self.alias_map[alias] = self.alias_map[alias].promote()
832 # Join type of 'alias' changed, so re-examine all aliases that
833 # refer to this one.
834 aliases.extend(
835 join
836 for join in self.alias_map
837 if self.alias_map[join].parent_alias == alias
838 and join not in aliases
839 )
840
841 def demote_joins(self, aliases):
842 """
843 Change join type from LOUTER to INNER for all joins in aliases.
844
845 Similarly to promote_joins(), this method must ensure no join chains
846 containing first an outer, then an inner join are generated. If we
847 are demoting b->c join in chain a LOUTER b LOUTER c then we must
848 demote a->b automatically, or otherwise the demotion of b->c doesn't
849 actually change anything in the query results. .
850 """
851 aliases = list(aliases)
852 while aliases:
853 alias = aliases.pop(0)
854 if self.alias_map[alias].join_type == LOUTER:
855 self.alias_map[alias] = self.alias_map[alias].demote()
856 parent_alias = self.alias_map[alias].parent_alias
857 if self.alias_map[parent_alias].join_type == INNER:
858 aliases.append(parent_alias)
859
860 def reset_refcounts(self, to_counts):
861 """
862 Reset reference counts for aliases so that they match the value passed
863 in `to_counts`.
864 """
865 for alias, cur_refcount in self.alias_refcount.copy().items():
866 unref_amount = cur_refcount - to_counts.get(alias, 0)
867 self.unref_alias(alias, unref_amount)
868
869 def change_aliases(self, change_map):
870 """
871 Change the aliases in change_map (which maps old-alias -> new-alias),
872 relabelling any references to them in select columns and the where
873 clause.
874 """
875 # If keys and values of change_map were to intersect, an alias might be
876 # updated twice (e.g. T4 -> T5, T5 -> T6, so also T4 -> T6) depending
877 # on their order in change_map.
878 assert set(change_map).isdisjoint(change_map.values())
879
880 # 1. Update references in "select" (normal columns plus aliases),
881 # "group by" and "where".
882 self.where.relabel_aliases(change_map)
883 if isinstance(self.group_by, tuple):
884 self.group_by = tuple(
885 [col.relabeled_clone(change_map) for col in self.group_by]
886 )
887 self.select = tuple([col.relabeled_clone(change_map) for col in self.select])
888 self.annotations = self.annotations and {
889 key: col.relabeled_clone(change_map)
890 for key, col in self.annotations.items()
891 }
892
893 # 2. Rename the alias in the internal table/alias datastructures.
894 for old_alias, new_alias in change_map.items():
895 if old_alias not in self.alias_map:
896 continue
897 alias_data = self.alias_map[old_alias].relabeled_clone(change_map)
898 self.alias_map[new_alias] = alias_data
899 self.alias_refcount[new_alias] = self.alias_refcount[old_alias]
900 del self.alias_refcount[old_alias]
901 del self.alias_map[old_alias]
902
903 table_aliases = self.table_map[alias_data.table_name]
904 for pos, alias in enumerate(table_aliases):
905 if alias == old_alias:
906 table_aliases[pos] = new_alias
907 break
908 self.external_aliases = {
909 # Table is aliased or it's being changed and thus is aliased.
910 change_map.get(alias, alias): (aliased or alias in change_map)
911 for alias, aliased in self.external_aliases.items()
912 }
913
914 def bump_prefix(self, other_query, exclude=None):
915 """
916 Change the alias prefix to the next letter in the alphabet in a way
917 that the other query's aliases and this query's aliases will not
918 conflict. Even tables that previously had no alias will get an alias
919 after this call. To prevent changing aliases use the exclude parameter.
920 """
921
922 def prefix_gen():
923 """
924 Generate a sequence of characters in alphabetical order:
925 -> 'A', 'B', 'C', ...
926
927 When the alphabet is finished, the sequence will continue with the
928 Cartesian product:
929 -> 'AA', 'AB', 'AC', ...
930 """
931 alphabet = ascii_uppercase
932 prefix = chr(ord(self.alias_prefix) + 1)
933 yield prefix
934 for n in count(1):
935 seq = alphabet[alphabet.index(prefix) :] if prefix else alphabet
936 for s in product(seq, repeat=n):
937 yield "".join(s)
938 prefix = None
939
940 if self.alias_prefix != other_query.alias_prefix:
941 # No clashes between self and outer query should be possible.
942 return
943
944 # Explicitly avoid infinite loop. The constant divider is based on how
945 # much depth recursive subquery references add to the stack. This value
946 # might need to be adjusted when adding or removing function calls from
947 # the code path in charge of performing these operations.
948 local_recursion_limit = sys.getrecursionlimit() // 16
949 for pos, prefix in enumerate(prefix_gen()):
950 if prefix not in self.subq_aliases:
951 self.alias_prefix = prefix
952 break
953 if pos > local_recursion_limit:
954 raise RecursionError(
955 "Maximum recursion depth exceeded: too many subqueries."
956 )
957 self.subq_aliases = self.subq_aliases.union([self.alias_prefix])
958 other_query.subq_aliases = other_query.subq_aliases.union(self.subq_aliases)
959 if exclude is None:
960 exclude = {}
961 self.change_aliases(
962 {
963 alias: "%s%d" % (self.alias_prefix, pos) # noqa: UP031
964 for pos, alias in enumerate(self.alias_map)
965 if alias not in exclude
966 }
967 )
968
969 def get_initial_alias(self):
970 """
971 Return the first alias for this query, after increasing its reference
972 count.
973 """
974 if self.alias_map:
975 alias = self.base_table
976 self.ref_alias(alias)
977 elif self.model:
978 alias = self.join(self.base_table_class(self.get_meta().db_table, None))
979 else:
980 alias = None
981 return alias
982
983 def count_active_tables(self):
984 """
985 Return the number of tables in this query with a non-zero reference
986 count. After execution, the reference counts are zeroed, so tables
987 added in compiler will not be seen by this method.
988 """
989 return len([1 for count in self.alias_refcount.values() if count])
990
991 def join(self, join, reuse=None, reuse_with_filtered_relation=False):
992 """
993 Return an alias for the 'join', either reusing an existing alias for
994 that join or creating a new one. 'join' is either a base_table_class or
995 join_class.
996
997 The 'reuse' parameter can be either None which means all joins are
998 reusable, or it can be a set containing the aliases that can be reused.
999
1000 The 'reuse_with_filtered_relation' parameter is used when computing
1001 FilteredRelation instances.
1002
1003 A join is always created as LOUTER if the lhs alias is LOUTER to make
1004 sure chains like t1 LOUTER t2 INNER t3 aren't generated. All new
1005 joins are created as LOUTER if the join is nullable.
1006 """
1007 if reuse_with_filtered_relation and reuse:
1008 reuse_aliases = [
1009 a for a, j in self.alias_map.items() if a in reuse and j.equals(join)
1010 ]
1011 else:
1012 reuse_aliases = [
1013 a
1014 for a, j in self.alias_map.items()
1015 if (reuse is None or a in reuse) and j == join
1016 ]
1017 if reuse_aliases:
1018 if join.table_alias in reuse_aliases:
1019 reuse_alias = join.table_alias
1020 else:
1021 # Reuse the most recent alias of the joined table
1022 # (a many-to-many relation may be joined multiple times).
1023 reuse_alias = reuse_aliases[-1]
1024 self.ref_alias(reuse_alias)
1025 return reuse_alias
1026
1027 # No reuse is possible, so we need a new alias.
1028 alias, _ = self.table_alias(
1029 join.table_name, create=True, filtered_relation=join.filtered_relation
1030 )
1031 if join.join_type:
1032 if self.alias_map[join.parent_alias].join_type == LOUTER or join.nullable:
1033 join_type = LOUTER
1034 else:
1035 join_type = INNER
1036 join.join_type = join_type
1037 join.table_alias = alias
1038 self.alias_map[alias] = join
1039 return alias
1040
1041 def check_alias(self, alias):
1042 if FORBIDDEN_ALIAS_PATTERN.search(alias):
1043 raise ValueError(
1044 "Column aliases cannot contain whitespace characters, quotation marks, "
1045 "semicolons, or SQL comments."
1046 )
1047
1048 def add_annotation(self, annotation, alias, select=True):
1049 """Add a single annotation expression to the Query."""
1050 self.check_alias(alias)
1051 annotation = annotation.resolve_expression(self, allow_joins=True, reuse=None)
1052 if select:
1053 self.append_annotation_mask([alias])
1054 else:
1055 self.set_annotation_mask(set(self.annotation_select).difference({alias}))
1056 self.annotations[alias] = annotation
1057
1058 def resolve_expression(self, query, *args, **kwargs):
1059 clone = self.clone()
1060 # Subqueries need to use a different set of aliases than the outer query.
1061 clone.bump_prefix(query)
1062 clone.subquery = True
1063 clone.where.resolve_expression(query, *args, **kwargs)
1064 # Resolve combined queries.
1065 if clone.combinator:
1066 clone.combined_queries = tuple(
1067 [
1068 combined_query.resolve_expression(query, *args, **kwargs)
1069 for combined_query in clone.combined_queries
1070 ]
1071 )
1072 for key, value in clone.annotations.items():
1073 resolved = value.resolve_expression(query, *args, **kwargs)
1074 if hasattr(resolved, "external_aliases"):
1075 resolved.external_aliases.update(clone.external_aliases)
1076 clone.annotations[key] = resolved
1077 # Outer query's aliases are considered external.
1078 for alias, table in query.alias_map.items():
1079 clone.external_aliases[alias] = (
1080 isinstance(table, Join)
1081 and table.join_field.related_model._meta.db_table != alias
1082 ) or (
1083 isinstance(table, BaseTable) and table.table_name != table.table_alias
1084 )
1085 return clone
1086
1087 def get_external_cols(self):
1088 exprs = chain(self.annotations.values(), self.where.children)
1089 return [
1090 col
1091 for col in self._gen_cols(exprs, include_external=True)
1092 if col.alias in self.external_aliases
1093 ]
1094
1095 def get_group_by_cols(self, wrapper=None):
1096 # If wrapper is referenced by an alias for an explicit GROUP BY through
1097 # values() a reference to this expression and not the self must be
1098 # returned to ensure external column references are not grouped against
1099 # as well.
1100 external_cols = self.get_external_cols()
1101 if any(col.possibly_multivalued for col in external_cols):
1102 return [wrapper or self]
1103 return external_cols
1104
1105 def as_sql(self, compiler, connection):
1106 # Some backends (e.g. Oracle) raise an error when a subquery contains
1107 # unnecessary ORDER BY clause.
1108 if (
1109 self.subquery
1110 and not db_connection.features.ignores_unnecessary_order_by_in_subqueries
1111 ):
1112 self.clear_ordering(force=False)
1113 for query in self.combined_queries:
1114 query.clear_ordering(force=False)
1115 sql, params = self.get_compiler().as_sql()
1116 if self.subquery:
1117 sql = f"({sql})"
1118 return sql, params
1119
1120 def resolve_lookup_value(self, value, can_reuse, allow_joins):
1121 if hasattr(value, "resolve_expression"):
1122 value = value.resolve_expression(
1123 self,
1124 reuse=can_reuse,
1125 allow_joins=allow_joins,
1126 )
1127 elif isinstance(value, list | tuple):
1128 # The items of the iterable may be expressions and therefore need
1129 # to be resolved independently.
1130 values = (
1131 self.resolve_lookup_value(sub_value, can_reuse, allow_joins)
1132 for sub_value in value
1133 )
1134 type_ = type(value)
1135 if hasattr(type_, "_make"): # namedtuple
1136 return type_(*values)
1137 return type_(values)
1138 return value
1139
1140 def solve_lookup_type(self, lookup, summarize=False):
1141 """
1142 Solve the lookup type from the lookup (e.g.: 'foobar__id__icontains').
1143 """
1144 lookup_splitted = lookup.split(LOOKUP_SEP)
1145 if self.annotations:
1146 annotation, expression_lookups = refs_expression(
1147 lookup_splitted, self.annotations
1148 )
1149 if annotation:
1150 expression = self.annotations[annotation]
1151 if summarize:
1152 expression = Ref(annotation, expression)
1153 return expression_lookups, (), expression
1154 _, field, _, lookup_parts = self.names_to_path(lookup_splitted, self.get_meta())
1155 field_parts = lookup_splitted[0 : len(lookup_splitted) - len(lookup_parts)]
1156 if len(lookup_parts) > 1 and not field_parts:
1157 raise FieldError(
1158 f'Invalid lookup "{lookup}" for model {self.get_meta().model.__name__}".'
1159 )
1160 return lookup_parts, field_parts, False
1161
1162 def check_query_object_type(self, value, opts, field):
1163 """
1164 Check whether the object passed while querying is of the correct type.
1165 If not, raise a ValueError specifying the wrong object.
1166 """
1167 if hasattr(value, "_meta"):
1168 if not check_rel_lookup_compatibility(value._meta.model, opts, field):
1169 raise ValueError(
1170 f'Cannot query "{value}": Must be "{opts.object_name}" instance.'
1171 )
1172
1173 def check_related_objects(self, field, value, opts):
1174 """Check the type of object passed to query relations."""
1175 if field.is_relation:
1176 # Check that the field and the queryset use the same model in a
1177 # query like .filter(author=Author.query.all()). For example, the
1178 # opts would be Author's (from the author field) and value.model
1179 # would be Author.query.all() queryset's .model (Author also).
1180 # The field is the related field on the lhs side.
1181 if (
1182 isinstance(value, Query)
1183 and not value.has_select_fields
1184 and not check_rel_lookup_compatibility(value.model, opts, field)
1185 ):
1186 raise ValueError(
1187 f'Cannot use QuerySet for "{value.model._meta.object_name}": Use a QuerySet for "{opts.object_name}".'
1188 )
1189 elif hasattr(value, "_meta"):
1190 self.check_query_object_type(value, opts, field)
1191 elif hasattr(value, "__iter__"):
1192 for v in value:
1193 self.check_query_object_type(v, opts, field)
1194
1195 def check_filterable(self, expression):
1196 """Raise an error if expression cannot be used in a WHERE clause."""
1197 if hasattr(expression, "resolve_expression") and not getattr(
1198 expression, "filterable", True
1199 ):
1200 raise NotSupportedError(
1201 expression.__class__.__name__ + " is disallowed in the filter clause."
1202 )
1203 if hasattr(expression, "get_source_expressions"):
1204 for expr in expression.get_source_expressions():
1205 self.check_filterable(expr)
1206
1207 def build_lookup(self, lookups, lhs, rhs):
1208 """
1209 Try to extract transforms and lookup from given lhs.
1210
1211 The lhs value is something that works like SQLExpression.
1212 The rhs value is what the lookup is going to compare against.
1213 The lookups is a list of names to extract using get_lookup()
1214 and get_transform().
1215 """
1216 # __exact is the default lookup if one isn't given.
1217 *transforms, lookup_name = lookups or ["exact"]
1218 for name in transforms:
1219 lhs = self.try_transform(lhs, name)
1220 # First try get_lookup() so that the lookup takes precedence if the lhs
1221 # supports both transform and lookup for the name.
1222 lookup_class = lhs.get_lookup(lookup_name)
1223 if not lookup_class:
1224 # A lookup wasn't found. Try to interpret the name as a transform
1225 # and do an Exact lookup against it.
1226 lhs = self.try_transform(lhs, lookup_name)
1227 lookup_name = "exact"
1228 lookup_class = lhs.get_lookup(lookup_name)
1229 if not lookup_class:
1230 return
1231
1232 lookup = lookup_class(lhs, rhs)
1233 # Interpret '__exact=None' as the sql 'is NULL'; otherwise, reject all
1234 # uses of None as a query value unless the lookup supports it.
1235 if lookup.rhs is None and not lookup.can_use_none_as_rhs:
1236 if lookup_name not in ("exact", "iexact"):
1237 raise ValueError("Cannot use None as a query value")
1238 return lhs.get_lookup("isnull")(lhs, True)
1239
1240 return lookup
1241
1242 def try_transform(self, lhs, name):
1243 """
1244 Helper method for build_lookup(). Try to fetch and initialize
1245 a transform for name parameter from lhs.
1246 """
1247 transform_class = lhs.get_transform(name)
1248 if transform_class:
1249 return transform_class(lhs)
1250 else:
1251 output_field = lhs.output_field.__class__
1252 suggested_lookups = difflib.get_close_matches(
1253 name, output_field.get_lookups()
1254 )
1255 if suggested_lookups:
1256 suggestion = ", perhaps you meant {}?".format(
1257 " or ".join(suggested_lookups)
1258 )
1259 else:
1260 suggestion = "."
1261 raise FieldError(
1262 f"Unsupported lookup '{name}' for {output_field.__name__} or join on the field not "
1263 f"permitted{suggestion}"
1264 )
1265
1266 def build_filter(
1267 self,
1268 filter_expr,
1269 branch_negated=False,
1270 current_negated=False,
1271 can_reuse=None,
1272 allow_joins=True,
1273 split_subq=True,
1274 reuse_with_filtered_relation=False,
1275 check_filterable=True,
1276 summarize=False,
1277 ):
1278 """
1279 Build a WhereNode for a single filter clause but don't add it
1280 to this Query. Query.add_q() will then add this filter to the where
1281 Node.
1282
1283 The 'branch_negated' tells us if the current branch contains any
1284 negations. This will be used to determine if subqueries are needed.
1285
1286 The 'current_negated' is used to determine if the current filter is
1287 negated or not and this will be used to determine if IS NULL filtering
1288 is needed.
1289
1290 The difference between current_negated and branch_negated is that
1291 branch_negated is set on first negation, but current_negated is
1292 flipped for each negation.
1293
1294 Note that add_filter will not do any negating itself, that is done
1295 upper in the code by add_q().
1296
1297 The 'can_reuse' is a set of reusable joins for multijoins.
1298
1299 If 'reuse_with_filtered_relation' is True, then only joins in can_reuse
1300 will be reused.
1301
1302 The method will create a filter clause that can be added to the current
1303 query. However, if the filter isn't added to the query then the caller
1304 is responsible for unreffing the joins used.
1305 """
1306 if isinstance(filter_expr, dict):
1307 raise FieldError("Cannot parse keyword query as dict")
1308 if isinstance(filter_expr, Q):
1309 return self._add_q(
1310 filter_expr,
1311 branch_negated=branch_negated,
1312 current_negated=current_negated,
1313 used_aliases=can_reuse,
1314 allow_joins=allow_joins,
1315 split_subq=split_subq,
1316 check_filterable=check_filterable,
1317 summarize=summarize,
1318 )
1319 if hasattr(filter_expr, "resolve_expression"):
1320 if not getattr(filter_expr, "conditional", False):
1321 raise TypeError("Cannot filter against a non-conditional expression.")
1322 condition = filter_expr.resolve_expression(
1323 self, allow_joins=allow_joins, summarize=summarize
1324 )
1325 if not isinstance(condition, Lookup):
1326 condition = self.build_lookup(["exact"], condition, True)
1327 return WhereNode([condition], connector=AND), []
1328 arg, value = filter_expr
1329 if not arg:
1330 raise FieldError(f"Cannot parse keyword query {arg!r}")
1331 lookups, parts, reffed_expression = self.solve_lookup_type(arg, summarize)
1332
1333 if check_filterable:
1334 self.check_filterable(reffed_expression)
1335
1336 if not allow_joins and len(parts) > 1:
1337 raise FieldError("Joined field references are not permitted in this query")
1338
1339 pre_joins = self.alias_refcount.copy()
1340 value = self.resolve_lookup_value(value, can_reuse, allow_joins)
1341 used_joins = {
1342 k for k, v in self.alias_refcount.items() if v > pre_joins.get(k, 0)
1343 }
1344
1345 if check_filterable:
1346 self.check_filterable(value)
1347
1348 if reffed_expression:
1349 condition = self.build_lookup(lookups, reffed_expression, value)
1350 return WhereNode([condition], connector=AND), []
1351
1352 opts = self.get_meta()
1353 alias = self.get_initial_alias()
1354 allow_many = not branch_negated or not split_subq
1355
1356 try:
1357 join_info = self.setup_joins(
1358 parts,
1359 opts,
1360 alias,
1361 can_reuse=can_reuse,
1362 allow_many=allow_many,
1363 reuse_with_filtered_relation=reuse_with_filtered_relation,
1364 )
1365
1366 # Prevent iterator from being consumed by check_related_objects()
1367 if isinstance(value, Iterator):
1368 value = list(value)
1369 self.check_related_objects(join_info.final_field, value, join_info.opts)
1370
1371 # split_exclude() needs to know which joins were generated for the
1372 # lookup parts
1373 self._lookup_joins = join_info.joins
1374 except MultiJoin as e:
1375 return self.split_exclude(filter_expr, can_reuse, e.names_with_path)
1376
1377 # Update used_joins before trimming since they are reused to determine
1378 # which joins could be later promoted to INNER.
1379 used_joins.update(join_info.joins)
1380 targets, alias, join_list = self.trim_joins(
1381 join_info.targets, join_info.joins, join_info.path
1382 )
1383 if can_reuse is not None:
1384 can_reuse.update(join_list)
1385
1386 if join_info.final_field.is_relation:
1387 if len(targets) == 1:
1388 col = self._get_col(targets[0], join_info.final_field, alias)
1389 else:
1390 col = MultiColSource(
1391 alias, targets, join_info.targets, join_info.final_field
1392 )
1393 else:
1394 col = self._get_col(targets[0], join_info.final_field, alias)
1395
1396 condition = self.build_lookup(lookups, col, value)
1397 lookup_type = condition.lookup_name
1398 clause = WhereNode([condition], connector=AND)
1399
1400 require_outer = (
1401 lookup_type == "isnull" and condition.rhs is True and not current_negated
1402 )
1403 if (
1404 current_negated
1405 and (lookup_type != "isnull" or condition.rhs is False)
1406 and condition.rhs is not None
1407 ):
1408 require_outer = True
1409 if lookup_type != "isnull":
1410 # The condition added here will be SQL like this:
1411 # NOT (col IS NOT NULL), where the first NOT is added in
1412 # upper layers of code. The reason for addition is that if col
1413 # is null, then col != someval will result in SQL "unknown"
1414 # which isn't the same as in Python. The Python None handling
1415 # is wanted, and it can be gotten by
1416 # (col IS NULL OR col != someval)
1417 # <=>
1418 # NOT (col IS NOT NULL AND col = someval).
1419 if (
1420 self.is_nullable(targets[0])
1421 or self.alias_map[join_list[-1]].join_type == LOUTER
1422 ):
1423 lookup_class = targets[0].get_lookup("isnull")
1424 col = self._get_col(targets[0], join_info.targets[0], alias)
1425 clause.add(lookup_class(col, False), AND)
1426 # If someval is a nullable column, someval IS NOT NULL is
1427 # added.
1428 if isinstance(value, Col) and self.is_nullable(value.target):
1429 lookup_class = value.target.get_lookup("isnull")
1430 clause.add(lookup_class(value, False), AND)
1431 return clause, used_joins if not require_outer else ()
1432
1433 def add_filter(self, filter_lhs, filter_rhs):
1434 self.add_q(Q((filter_lhs, filter_rhs)))
1435
1436 def add_q(self, q_object):
1437 """
1438 A preprocessor for the internal _add_q(). Responsible for doing final
1439 join promotion.
1440 """
1441 # For join promotion this case is doing an AND for the added q_object
1442 # and existing conditions. So, any existing inner join forces the join
1443 # type to remain inner. Existing outer joins can however be demoted.
1444 # (Consider case where rel_a is LOUTER and rel_a__col=1 is added - if
1445 # rel_a doesn't produce any rows, then the whole condition must fail.
1446 # So, demotion is OK.
1447 existing_inner = {
1448 a for a in self.alias_map if self.alias_map[a].join_type == INNER
1449 }
1450 clause, _ = self._add_q(q_object, self.used_aliases)
1451 if clause:
1452 self.where.add(clause, AND)
1453 self.demote_joins(existing_inner)
1454
1455 def build_where(self, filter_expr):
1456 return self.build_filter(filter_expr, allow_joins=False)[0]
1457
1458 def clear_where(self):
1459 self.where = WhereNode()
1460
1461 def _add_q(
1462 self,
1463 q_object,
1464 used_aliases,
1465 branch_negated=False,
1466 current_negated=False,
1467 allow_joins=True,
1468 split_subq=True,
1469 check_filterable=True,
1470 summarize=False,
1471 ):
1472 """Add a Q-object to the current filter."""
1473 connector = q_object.connector
1474 current_negated ^= q_object.negated
1475 branch_negated = branch_negated or q_object.negated
1476 target_clause = WhereNode(connector=connector, negated=q_object.negated)
1477 joinpromoter = JoinPromoter(
1478 q_object.connector, len(q_object.children), current_negated
1479 )
1480 for child in q_object.children:
1481 child_clause, needed_inner = self.build_filter(
1482 child,
1483 can_reuse=used_aliases,
1484 branch_negated=branch_negated,
1485 current_negated=current_negated,
1486 allow_joins=allow_joins,
1487 split_subq=split_subq,
1488 check_filterable=check_filterable,
1489 summarize=summarize,
1490 )
1491 joinpromoter.add_votes(needed_inner)
1492 if child_clause:
1493 target_clause.add(child_clause, connector)
1494 needed_inner = joinpromoter.update_join_types(self)
1495 return target_clause, needed_inner
1496
1497 def build_filtered_relation_q(
1498 self, q_object, reuse, branch_negated=False, current_negated=False
1499 ):
1500 """Add a FilteredRelation object to the current filter."""
1501 connector = q_object.connector
1502 current_negated ^= q_object.negated
1503 branch_negated = branch_negated or q_object.negated
1504 target_clause = WhereNode(connector=connector, negated=q_object.negated)
1505 for child in q_object.children:
1506 if isinstance(child, Node):
1507 child_clause = self.build_filtered_relation_q(
1508 child,
1509 reuse=reuse,
1510 branch_negated=branch_negated,
1511 current_negated=current_negated,
1512 )
1513 else:
1514 child_clause, _ = self.build_filter(
1515 child,
1516 can_reuse=reuse,
1517 branch_negated=branch_negated,
1518 current_negated=current_negated,
1519 allow_joins=True,
1520 split_subq=False,
1521 reuse_with_filtered_relation=True,
1522 )
1523 target_clause.add(child_clause, connector)
1524 return target_clause
1525
1526 def add_filtered_relation(self, filtered_relation, alias):
1527 filtered_relation.alias = alias
1528 lookups = dict(get_children_from_q(filtered_relation.condition))
1529 relation_lookup_parts, relation_field_parts, _ = self.solve_lookup_type(
1530 filtered_relation.relation_name
1531 )
1532 if relation_lookup_parts:
1533 raise ValueError(
1534 "FilteredRelation's relation_name cannot contain lookups "
1535 f"(got {filtered_relation.relation_name!r})."
1536 )
1537 for lookup in chain(lookups):
1538 lookup_parts, lookup_field_parts, _ = self.solve_lookup_type(lookup)
1539 shift = 2 if not lookup_parts else 1
1540 lookup_field_path = lookup_field_parts[:-shift]
1541 for idx, lookup_field_part in enumerate(lookup_field_path):
1542 if len(relation_field_parts) > idx:
1543 if relation_field_parts[idx] != lookup_field_part:
1544 raise ValueError(
1545 "FilteredRelation's condition doesn't support "
1546 f"relations outside the {filtered_relation.relation_name!r} (got {lookup!r})."
1547 )
1548 else:
1549 raise ValueError(
1550 "FilteredRelation's condition doesn't support nested "
1551 f"relations deeper than the relation_name (got {lookup!r} for "
1552 f"{filtered_relation.relation_name!r})."
1553 )
1554 self._filtered_relations[filtered_relation.alias] = filtered_relation
1555
1556 def names_to_path(self, names, opts, allow_many=True, fail_on_missing=False):
1557 """
1558 Walk the list of names and turns them into PathInfo tuples. A single
1559 name in 'names' can generate multiple PathInfos (m2m, for example).
1560
1561 'names' is the path of names to travel, 'opts' is the model Options we
1562 start the name resolving from, 'allow_many' is as for setup_joins().
1563 If fail_on_missing is set to True, then a name that can't be resolved
1564 will generate a FieldError.
1565
1566 Return a list of PathInfo tuples. In addition return the final field
1567 (the last used join field) and target (which is a field guaranteed to
1568 contain the same value as the final field). Finally, return those names
1569 that weren't found (which are likely transforms and the final lookup).
1570 """
1571 path, names_with_path = [], []
1572 for pos, name in enumerate(names):
1573 cur_names_with_path = (name, [])
1574
1575 field = None
1576 filtered_relation = None
1577 try:
1578 if opts is None:
1579 raise FieldDoesNotExist
1580 field = opts.get_field(name)
1581 except FieldDoesNotExist:
1582 if name in self.annotation_select:
1583 field = self.annotation_select[name].output_field
1584 elif name in self._filtered_relations and pos == 0:
1585 filtered_relation = self._filtered_relations[name]
1586 if LOOKUP_SEP in filtered_relation.relation_name:
1587 parts = filtered_relation.relation_name.split(LOOKUP_SEP)
1588 filtered_relation_path, field, _, _ = self.names_to_path(
1589 parts,
1590 opts,
1591 allow_many,
1592 fail_on_missing,
1593 )
1594 path.extend(filtered_relation_path[:-1])
1595 else:
1596 field = opts.get_field(filtered_relation.relation_name)
1597 if field is not None:
1598 # Fields that contain one-to-many relations with a generic
1599 # model (like a GenericForeignKey) cannot generate reverse
1600 # relations and therefore cannot be used for reverse querying.
1601 if field.is_relation and not field.related_model:
1602 raise FieldError(
1603 f"Field {name!r} does not generate an automatic reverse "
1604 "relation and therefore cannot be used for reverse "
1605 "querying. If it is a GenericForeignKey, consider "
1606 "adding a GenericRelation."
1607 )
1608 else:
1609 # We didn't find the current field, so move position back
1610 # one step.
1611 pos -= 1
1612 if pos == -1 or fail_on_missing:
1613 available = sorted(
1614 [
1615 *get_field_names_from_opts(opts),
1616 *self.annotation_select,
1617 *self._filtered_relations,
1618 ]
1619 )
1620 raise FieldError(
1621 "Cannot resolve keyword '{}' into field. "
1622 "Choices are: {}".format(name, ", ".join(available))
1623 )
1624 break
1625
1626 if hasattr(field, "path_infos"):
1627 if filtered_relation:
1628 pathinfos = field.get_path_info(filtered_relation)
1629 else:
1630 pathinfos = field.path_infos
1631 if not allow_many:
1632 for inner_pos, p in enumerate(pathinfos):
1633 if p.m2m:
1634 cur_names_with_path[1].extend(pathinfos[0 : inner_pos + 1])
1635 names_with_path.append(cur_names_with_path)
1636 raise MultiJoin(pos + 1, names_with_path)
1637 last = pathinfos[-1]
1638 path.extend(pathinfos)
1639 final_field = last.join_field
1640 opts = last.to_opts
1641 targets = last.target_fields
1642 cur_names_with_path[1].extend(pathinfos)
1643 names_with_path.append(cur_names_with_path)
1644 else:
1645 # Local non-relational field.
1646 final_field = field
1647 targets = (field,)
1648 if fail_on_missing and pos + 1 != len(names):
1649 raise FieldError(
1650 f"Cannot resolve keyword {names[pos + 1]!r} into field. Join on '{name}'"
1651 " not permitted."
1652 )
1653 break
1654 return path, final_field, targets, names[pos + 1 :]
1655
1656 def setup_joins(
1657 self,
1658 names,
1659 opts,
1660 alias,
1661 can_reuse=None,
1662 allow_many=True,
1663 reuse_with_filtered_relation=False,
1664 ):
1665 """
1666 Compute the necessary table joins for the passage through the fields
1667 given in 'names'. 'opts' is the Options class for the current model
1668 (which gives the table we are starting from), 'alias' is the alias for
1669 the table to start the joining from.
1670
1671 The 'can_reuse' defines the reverse foreign key joins we can reuse. It
1672 can be None in which case all joins are reusable or a set of aliases
1673 that can be reused. Note that non-reverse foreign keys are always
1674 reusable when using setup_joins().
1675
1676 The 'reuse_with_filtered_relation' can be used to force 'can_reuse'
1677 parameter and force the relation on the given connections.
1678
1679 If 'allow_many' is False, then any reverse foreign key seen will
1680 generate a MultiJoin exception.
1681
1682 Return the final field involved in the joins, the target field (used
1683 for any 'where' constraint), the final 'opts' value, the joins, the
1684 field path traveled to generate the joins, and a transform function
1685 that takes a field and alias and is equivalent to `field.get_col(alias)`
1686 in the simple case but wraps field transforms if they were included in
1687 names.
1688
1689 The target field is the field containing the concrete value. Final
1690 field can be something different, for example foreign key pointing to
1691 that value. Final field is needed for example in some value
1692 conversions (convert 'obj' in fk__id=obj to pk val using the foreign
1693 key field for example).
1694 """
1695 joins = [alias]
1696 # The transform can't be applied yet, as joins must be trimmed later.
1697 # To avoid making every caller of this method look up transforms
1698 # directly, compute transforms here and create a partial that converts
1699 # fields to the appropriate wrapped version.
1700
1701 def final_transformer(field, alias):
1702 if not self.alias_cols:
1703 alias = None
1704 return field.get_col(alias)
1705
1706 # Try resolving all the names as fields first. If there's an error,
1707 # treat trailing names as lookups until a field can be resolved.
1708 last_field_exception = None
1709 for pivot in range(len(names), 0, -1):
1710 try:
1711 path, final_field, targets, rest = self.names_to_path(
1712 names[:pivot],
1713 opts,
1714 allow_many,
1715 fail_on_missing=True,
1716 )
1717 except FieldError as exc:
1718 if pivot == 1:
1719 # The first item cannot be a lookup, so it's safe
1720 # to raise the field error here.
1721 raise
1722 else:
1723 last_field_exception = exc
1724 else:
1725 # The transforms are the remaining items that couldn't be
1726 # resolved into fields.
1727 transforms = names[pivot:]
1728 break
1729 for name in transforms:
1730
1731 def transform(field, alias, *, name, previous):
1732 try:
1733 wrapped = previous(field, alias)
1734 return self.try_transform(wrapped, name)
1735 except FieldError:
1736 # FieldError is raised if the transform doesn't exist.
1737 if isinstance(final_field, Field) and last_field_exception:
1738 raise last_field_exception
1739 else:
1740 raise
1741
1742 final_transformer = functools.partial(
1743 transform, name=name, previous=final_transformer
1744 )
1745 final_transformer.has_transforms = True
1746 # Then, add the path to the query's joins. Note that we can't trim
1747 # joins at this stage - we will need the information about join type
1748 # of the trimmed joins.
1749 for join in path:
1750 if join.filtered_relation:
1751 filtered_relation = join.filtered_relation.clone()
1752 table_alias = filtered_relation.alias
1753 else:
1754 filtered_relation = None
1755 table_alias = None
1756 opts = join.to_opts
1757 if join.direct:
1758 nullable = self.is_nullable(join.join_field)
1759 else:
1760 nullable = True
1761 connection = self.join_class(
1762 opts.db_table,
1763 alias,
1764 table_alias,
1765 INNER,
1766 join.join_field,
1767 nullable,
1768 filtered_relation=filtered_relation,
1769 )
1770 reuse = can_reuse if join.m2m or reuse_with_filtered_relation else None
1771 alias = self.join(
1772 connection,
1773 reuse=reuse,
1774 reuse_with_filtered_relation=reuse_with_filtered_relation,
1775 )
1776 joins.append(alias)
1777 if filtered_relation:
1778 filtered_relation.path = joins[:]
1779 return JoinInfo(final_field, targets, opts, joins, path, final_transformer)
1780
1781 def trim_joins(self, targets, joins, path):
1782 """
1783 The 'target' parameter is the final field being joined to, 'joins'
1784 is the full list of join aliases. The 'path' contain the PathInfos
1785 used to create the joins.
1786
1787 Return the final target field and table alias and the new active
1788 joins.
1789
1790 Always trim any direct join if the target column is already in the
1791 previous table. Can't trim reverse joins as it's unknown if there's
1792 anything on the other side of the join.
1793 """
1794 joins = joins[:]
1795 for pos, info in enumerate(reversed(path)):
1796 if len(joins) == 1 or not info.direct:
1797 break
1798 if info.filtered_relation:
1799 break
1800 join_targets = {t.column for t in info.join_field.foreign_related_fields}
1801 cur_targets = {t.column for t in targets}
1802 if not cur_targets.issubset(join_targets):
1803 break
1804 targets_dict = {
1805 r[1].column: r[0]
1806 for r in info.join_field.related_fields
1807 if r[1].column in cur_targets
1808 }
1809 targets = tuple(targets_dict[t.column] for t in targets)
1810 self.unref_alias(joins.pop())
1811 return targets, joins[-1], joins
1812
1813 @classmethod
1814 def _gen_cols(cls, exprs, include_external=False, resolve_refs=True):
1815 for expr in exprs:
1816 if isinstance(expr, Col):
1817 yield expr
1818 elif include_external and callable(
1819 getattr(expr, "get_external_cols", None)
1820 ):
1821 yield from expr.get_external_cols()
1822 elif hasattr(expr, "get_source_expressions"):
1823 if not resolve_refs and isinstance(expr, Ref):
1824 continue
1825 yield from cls._gen_cols(
1826 expr.get_source_expressions(),
1827 include_external=include_external,
1828 resolve_refs=resolve_refs,
1829 )
1830
1831 @classmethod
1832 def _gen_col_aliases(cls, exprs):
1833 yield from (expr.alias for expr in cls._gen_cols(exprs))
1834
1835 def resolve_ref(self, name, allow_joins=True, reuse=None, summarize=False):
1836 annotation = self.annotations.get(name)
1837 if annotation is not None:
1838 if not allow_joins:
1839 for alias in self._gen_col_aliases([annotation]):
1840 if isinstance(self.alias_map[alias], Join):
1841 raise FieldError(
1842 "Joined field references are not permitted in this query"
1843 )
1844 if summarize:
1845 # Summarize currently means we are doing an aggregate() query
1846 # which is executed as a wrapped subquery if any of the
1847 # aggregate() elements reference an existing annotation. In
1848 # that case we need to return a Ref to the subquery's annotation.
1849 if name not in self.annotation_select:
1850 raise FieldError(
1851 f"Cannot aggregate over the '{name}' alias. Use annotate() "
1852 "to promote it."
1853 )
1854 return Ref(name, self.annotation_select[name])
1855 else:
1856 return annotation
1857 else:
1858 field_list = name.split(LOOKUP_SEP)
1859 annotation = self.annotations.get(field_list[0])
1860 if annotation is not None:
1861 for transform in field_list[1:]:
1862 annotation = self.try_transform(annotation, transform)
1863 return annotation
1864 join_info = self.setup_joins(
1865 field_list, self.get_meta(), self.get_initial_alias(), can_reuse=reuse
1866 )
1867 targets, final_alias, join_list = self.trim_joins(
1868 join_info.targets, join_info.joins, join_info.path
1869 )
1870 if not allow_joins and len(join_list) > 1:
1871 raise FieldError(
1872 "Joined field references are not permitted in this query"
1873 )
1874 if len(targets) > 1:
1875 raise FieldError(
1876 "Referencing multicolumn fields with F() objects isn't supported"
1877 )
1878 # Verify that the last lookup in name is a field or a transform:
1879 # transform_function() raises FieldError if not.
1880 transform = join_info.transform_function(targets[0], final_alias)
1881 if reuse is not None:
1882 reuse.update(join_list)
1883 return transform
1884
1885 def split_exclude(self, filter_expr, can_reuse, names_with_path):
1886 """
1887 When doing an exclude against any kind of N-to-many relation, we need
1888 to use a subquery. This method constructs the nested query, given the
1889 original exclude filter (filter_expr) and the portion up to the first
1890 N-to-many relation field.
1891
1892 For example, if the origin filter is ~Q(child__name='foo'), filter_expr
1893 is ('child__name', 'foo') and can_reuse is a set of joins usable for
1894 filters in the original query.
1895
1896 We will turn this into equivalent of:
1897 WHERE NOT EXISTS(
1898 SELECT 1
1899 FROM child
1900 WHERE name = 'foo' AND child.parent_id = parent.id
1901 LIMIT 1
1902 )
1903 """
1904 # Generate the inner query.
1905 query = self.__class__(self.model)
1906 query._filtered_relations = self._filtered_relations
1907 filter_lhs, filter_rhs = filter_expr
1908 if isinstance(filter_rhs, OuterRef):
1909 filter_rhs = OuterRef(filter_rhs)
1910 elif isinstance(filter_rhs, F):
1911 filter_rhs = OuterRef(filter_rhs.name)
1912 query.add_filter(filter_lhs, filter_rhs)
1913 query.clear_ordering(force=True)
1914 # Try to have as simple as possible subquery -> trim leading joins from
1915 # the subquery.
1916 trimmed_prefix, contains_louter = query.trim_start(names_with_path)
1917
1918 col = query.select[0]
1919 select_field = col.target
1920 alias = col.alias
1921 if alias in can_reuse:
1922 id_field = select_field.model._meta.get_field("id")
1923 # Need to add a restriction so that outer query's filters are in effect for
1924 # the subquery, too.
1925 query.bump_prefix(self)
1926 lookup_class = select_field.get_lookup("exact")
1927 # Note that the query.select[0].alias is different from alias
1928 # due to bump_prefix above.
1929 lookup = lookup_class(
1930 id_field.get_col(query.select[0].alias), id_field.get_col(alias)
1931 )
1932 query.where.add(lookup, AND)
1933 query.external_aliases[alias] = True
1934
1935 lookup_class = select_field.get_lookup("exact")
1936 lookup = lookup_class(col, ResolvedOuterRef(trimmed_prefix))
1937 query.where.add(lookup, AND)
1938 condition, needed_inner = self.build_filter(Exists(query))
1939
1940 if contains_louter:
1941 or_null_condition, _ = self.build_filter(
1942 (f"{trimmed_prefix}__isnull", True),
1943 current_negated=True,
1944 branch_negated=True,
1945 can_reuse=can_reuse,
1946 )
1947 condition.add(or_null_condition, OR)
1948 # Note that the end result will be:
1949 # (outercol NOT IN innerq AND outercol IS NOT NULL) OR outercol IS NULL.
1950 # This might look crazy but due to how IN works, this seems to be
1951 # correct. If the IS NOT NULL check is removed then outercol NOT
1952 # IN will return UNKNOWN. If the IS NULL check is removed, then if
1953 # outercol IS NULL we will not match the row.
1954 return condition, needed_inner
1955
1956 def set_empty(self):
1957 self.where.add(NothingNode(), AND)
1958 for query in self.combined_queries:
1959 query.set_empty()
1960
1961 def is_empty(self):
1962 return any(isinstance(c, NothingNode) for c in self.where.children)
1963
1964 def set_limits(self, low=None, high=None):
1965 """
1966 Adjust the limits on the rows retrieved. Use low/high to set these,
1967 as it makes it more Pythonic to read and write. When the SQL query is
1968 created, convert them to the appropriate offset and limit values.
1969
1970 Apply any limits passed in here to the existing constraints. Add low
1971 to the current low value and clamp both to any existing high value.
1972 """
1973 if high is not None:
1974 if self.high_mark is not None:
1975 self.high_mark = min(self.high_mark, self.low_mark + high)
1976 else:
1977 self.high_mark = self.low_mark + high
1978 if low is not None:
1979 if self.high_mark is not None:
1980 self.low_mark = min(self.high_mark, self.low_mark + low)
1981 else:
1982 self.low_mark = self.low_mark + low
1983
1984 if self.low_mark == self.high_mark:
1985 self.set_empty()
1986
1987 def clear_limits(self):
1988 """Clear any existing limits."""
1989 self.low_mark, self.high_mark = 0, None
1990
1991 @property
1992 def is_sliced(self):
1993 return self.low_mark != 0 or self.high_mark is not None
1994
1995 def has_limit_one(self):
1996 return self.high_mark is not None and (self.high_mark - self.low_mark) == 1
1997
1998 def can_filter(self):
1999 """
2000 Return True if adding filters to this instance is still possible.
2001
2002 Typically, this means no limits or offsets have been put on the results.
2003 """
2004 return not self.is_sliced
2005
2006 def clear_select_clause(self):
2007 """Remove all fields from SELECT clause."""
2008 self.select = ()
2009 self.default_cols = False
2010 self.select_related = False
2011 self.set_extra_mask(())
2012 self.set_annotation_mask(())
2013
2014 def clear_select_fields(self):
2015 """
2016 Clear the list of fields to select (but not extra_select columns).
2017 Some queryset types completely replace any existing list of select
2018 columns.
2019 """
2020 self.select = ()
2021 self.values_select = ()
2022
2023 def add_select_col(self, col, name):
2024 self.select += (col,)
2025 self.values_select += (name,)
2026
2027 def set_select(self, cols):
2028 self.default_cols = False
2029 self.select = tuple(cols)
2030
2031 def add_distinct_fields(self, *field_names):
2032 """
2033 Add and resolve the given fields to the query's "distinct on" clause.
2034 """
2035 self.distinct_fields = field_names
2036 self.distinct = True
2037
2038 def add_fields(self, field_names, allow_m2m=True):
2039 """
2040 Add the given (model) fields to the select set. Add the field names in
2041 the order specified.
2042 """
2043 alias = self.get_initial_alias()
2044 opts = self.get_meta()
2045
2046 try:
2047 cols = []
2048 for name in field_names:
2049 # Join promotion note - we must not remove any rows here, so
2050 # if there is no existing joins, use outer join.
2051 join_info = self.setup_joins(
2052 name.split(LOOKUP_SEP), opts, alias, allow_many=allow_m2m
2053 )
2054 targets, final_alias, joins = self.trim_joins(
2055 join_info.targets,
2056 join_info.joins,
2057 join_info.path,
2058 )
2059 for target in targets:
2060 cols.append(join_info.transform_function(target, final_alias))
2061 if cols:
2062 self.set_select(cols)
2063 except MultiJoin:
2064 raise FieldError(f"Invalid field name: '{name}'")
2065 except FieldError:
2066 if LOOKUP_SEP in name:
2067 # For lookups spanning over relationships, show the error
2068 # from the model on which the lookup failed.
2069 raise
2070 elif name in self.annotations:
2071 raise FieldError(
2072 f"Cannot select the '{name}' alias. Use annotate() to promote it."
2073 )
2074 else:
2075 names = sorted(
2076 [
2077 *get_field_names_from_opts(opts),
2078 *self.extra,
2079 *self.annotation_select,
2080 *self._filtered_relations,
2081 ]
2082 )
2083 raise FieldError(
2084 "Cannot resolve keyword {!r} into field. Choices are: {}".format(
2085 name, ", ".join(names)
2086 )
2087 )
2088
2089 def add_ordering(self, *ordering):
2090 """
2091 Add items from the 'ordering' sequence to the query's "order by"
2092 clause. These items are either field names (not column names) --
2093 possibly with a direction prefix ('-' or '?') -- or OrderBy
2094 expressions.
2095
2096 If 'ordering' is empty, clear all ordering from the query.
2097 """
2098 errors = []
2099 for item in ordering:
2100 if isinstance(item, str):
2101 if item == "?":
2102 continue
2103 item = item.removeprefix("-")
2104 if item in self.annotations:
2105 continue
2106 if self.extra and item in self.extra:
2107 continue
2108 # names_to_path() validates the lookup. A descriptive
2109 # FieldError will be raise if it's not.
2110 self.names_to_path(item.split(LOOKUP_SEP), self.model._meta)
2111 elif not hasattr(item, "resolve_expression"):
2112 errors.append(item)
2113 if getattr(item, "contains_aggregate", False):
2114 raise FieldError(
2115 "Using an aggregate in order_by() without also including "
2116 f"it in annotate() is not allowed: {item}"
2117 )
2118 if errors:
2119 raise FieldError(f"Invalid order_by arguments: {errors}")
2120 if ordering:
2121 self.order_by += ordering
2122 else:
2123 self.default_ordering = False
2124
2125 def clear_ordering(self, force=False, clear_default=True):
2126 """
2127 Remove any ordering settings if the current query allows it without
2128 side effects, set 'force' to True to clear the ordering regardless.
2129 If 'clear_default' is True, there will be no ordering in the resulting
2130 query (not even the model's default).
2131 """
2132 if not force and (
2133 self.is_sliced or self.distinct_fields or self.select_for_update
2134 ):
2135 return
2136 self.order_by = ()
2137 self.extra_order_by = ()
2138 if clear_default:
2139 self.default_ordering = False
2140
2141 def set_group_by(self, allow_aliases=True):
2142 """
2143 Expand the GROUP BY clause required by the query.
2144
2145 This will usually be the set of all non-aggregate fields in the
2146 return data. If the database backend supports grouping by the
2147 primary key, and the query would be equivalent, the optimization
2148 will be made automatically.
2149 """
2150 if allow_aliases and self.values_select:
2151 # If grouping by aliases is allowed assign selected value aliases
2152 # by moving them to annotations.
2153 group_by_annotations = {}
2154 values_select = {}
2155 for alias, expr in zip(self.values_select, self.select):
2156 if isinstance(expr, Col):
2157 values_select[alias] = expr
2158 else:
2159 group_by_annotations[alias] = expr
2160 self.annotations = {**group_by_annotations, **self.annotations}
2161 self.append_annotation_mask(group_by_annotations)
2162 self.select = tuple(values_select.values())
2163 self.values_select = tuple(values_select)
2164 group_by = list(self.select)
2165 for alias, annotation in self.annotation_select.items():
2166 if not (group_by_cols := annotation.get_group_by_cols()):
2167 continue
2168 if allow_aliases and not annotation.contains_aggregate:
2169 group_by.append(Ref(alias, annotation))
2170 else:
2171 group_by.extend(group_by_cols)
2172 self.group_by = tuple(group_by)
2173
2174 def add_select_related(self, fields):
2175 """
2176 Set up the select_related data structure so that we only select
2177 certain related models (as opposed to all models, when
2178 self.select_related=True).
2179 """
2180 if isinstance(self.select_related, bool):
2181 field_dict = {}
2182 else:
2183 field_dict = self.select_related
2184 for field in fields:
2185 d = field_dict
2186 for part in field.split(LOOKUP_SEP):
2187 d = d.setdefault(part, {})
2188 self.select_related = field_dict
2189
2190 def add_extra(self, select, select_params, where, params, tables, order_by):
2191 """
2192 Add data to the various extra_* attributes for user-created additions
2193 to the query.
2194 """
2195 if select:
2196 # We need to pair any placeholder markers in the 'select'
2197 # dictionary with their parameters in 'select_params' so that
2198 # subsequent updates to the select dictionary also adjust the
2199 # parameters appropriately.
2200 select_pairs = {}
2201 if select_params:
2202 param_iter = iter(select_params)
2203 else:
2204 param_iter = iter([])
2205 for name, entry in select.items():
2206 self.check_alias(name)
2207 entry = str(entry)
2208 entry_params = []
2209 pos = entry.find("%s")
2210 while pos != -1:
2211 if pos == 0 or entry[pos - 1] != "%":
2212 entry_params.append(next(param_iter))
2213 pos = entry.find("%s", pos + 2)
2214 select_pairs[name] = (entry, entry_params)
2215 self.extra.update(select_pairs)
2216 if where or params:
2217 self.where.add(ExtraWhere(where, params), AND)
2218 if tables:
2219 self.extra_tables += tuple(tables)
2220 if order_by:
2221 self.extra_order_by = order_by
2222
2223 def clear_deferred_loading(self):
2224 """Remove any fields from the deferred loading set."""
2225 self.deferred_loading = (frozenset(), True)
2226
2227 def add_deferred_loading(self, field_names):
2228 """
2229 Add the given list of model field names to the set of fields to
2230 exclude from loading from the database when automatic column selection
2231 is done. Add the new field names to any existing field names that
2232 are deferred (or removed from any existing field names that are marked
2233 as the only ones for immediate loading).
2234 """
2235 # Fields on related models are stored in the literal double-underscore
2236 # format, so that we can use a set datastructure. We do the foo__bar
2237 # splitting and handling when computing the SQL column names (as part of
2238 # get_columns()).
2239 existing, defer = self.deferred_loading
2240 if defer:
2241 # Add to existing deferred names.
2242 self.deferred_loading = existing.union(field_names), True
2243 else:
2244 # Remove names from the set of any existing "immediate load" names.
2245 if new_existing := existing.difference(field_names):
2246 self.deferred_loading = new_existing, False
2247 else:
2248 self.clear_deferred_loading()
2249 if new_only := set(field_names).difference(existing):
2250 self.deferred_loading = new_only, True
2251
2252 def add_immediate_loading(self, field_names):
2253 """
2254 Add the given list of model field names to the set of fields to
2255 retrieve when the SQL is executed ("immediate loading" fields). The
2256 field names replace any existing immediate loading field names. If
2257 there are field names already specified for deferred loading, remove
2258 those names from the new field_names before storing the new names
2259 for immediate loading. (That is, immediate loading overrides any
2260 existing immediate values, but respects existing deferrals.)
2261 """
2262 existing, defer = self.deferred_loading
2263 field_names = set(field_names)
2264
2265 if defer:
2266 # Remove any existing deferred names from the current set before
2267 # setting the new names.
2268 self.deferred_loading = field_names.difference(existing), False
2269 else:
2270 # Replace any existing "immediate load" field names.
2271 self.deferred_loading = frozenset(field_names), False
2272
2273 def set_annotation_mask(self, names):
2274 """Set the mask of annotations that will be returned by the SELECT."""
2275 if names is None:
2276 self.annotation_select_mask = None
2277 else:
2278 self.annotation_select_mask = set(names)
2279 self._annotation_select_cache = None
2280
2281 def append_annotation_mask(self, names):
2282 if self.annotation_select_mask is not None:
2283 self.set_annotation_mask(self.annotation_select_mask.union(names))
2284
2285 def set_extra_mask(self, names):
2286 """
2287 Set the mask of extra select items that will be returned by SELECT.
2288 Don't remove them from the Query since they might be used later.
2289 """
2290 if names is None:
2291 self.extra_select_mask = None
2292 else:
2293 self.extra_select_mask = set(names)
2294 self._extra_select_cache = None
2295
2296 def set_values(self, fields):
2297 self.select_related = False
2298 self.clear_deferred_loading()
2299 self.clear_select_fields()
2300 self.has_select_fields = True
2301
2302 if fields:
2303 field_names = []
2304 extra_names = []
2305 annotation_names = []
2306 if not self.extra and not self.annotations:
2307 # Shortcut - if there are no extra or annotations, then
2308 # the values() clause must be just field names.
2309 field_names = list(fields)
2310 else:
2311 self.default_cols = False
2312 for f in fields:
2313 if f in self.extra_select:
2314 extra_names.append(f)
2315 elif f in self.annotation_select:
2316 annotation_names.append(f)
2317 else:
2318 field_names.append(f)
2319 self.set_extra_mask(extra_names)
2320 self.set_annotation_mask(annotation_names)
2321 selected = frozenset(field_names + extra_names + annotation_names)
2322 else:
2323 field_names = [f.attname for f in self.model._meta.concrete_fields]
2324 selected = frozenset(field_names)
2325 # Selected annotations must be known before setting the GROUP BY
2326 # clause.
2327 if self.group_by is True:
2328 self.add_fields(
2329 (f.attname for f in self.model._meta.concrete_fields), False
2330 )
2331 # Disable GROUP BY aliases to avoid orphaning references to the
2332 # SELECT clause which is about to be cleared.
2333 self.set_group_by(allow_aliases=False)
2334 self.clear_select_fields()
2335 elif self.group_by:
2336 # Resolve GROUP BY annotation references if they are not part of
2337 # the selected fields anymore.
2338 group_by = []
2339 for expr in self.group_by:
2340 if isinstance(expr, Ref) and expr.refs not in selected:
2341 expr = self.annotations[expr.refs]
2342 group_by.append(expr)
2343 self.group_by = tuple(group_by)
2344
2345 self.values_select = tuple(field_names)
2346 self.add_fields(field_names, True)
2347
2348 @property
2349 def annotation_select(self):
2350 """
2351 Return the dictionary of aggregate columns that are not masked and
2352 should be used in the SELECT clause. Cache this result for performance.
2353 """
2354 if self._annotation_select_cache is not None:
2355 return self._annotation_select_cache
2356 elif not self.annotations:
2357 return {}
2358 elif self.annotation_select_mask is not None:
2359 self._annotation_select_cache = {
2360 k: v
2361 for k, v in self.annotations.items()
2362 if k in self.annotation_select_mask
2363 }
2364 return self._annotation_select_cache
2365 else:
2366 return self.annotations
2367
2368 @property
2369 def extra_select(self):
2370 if self._extra_select_cache is not None:
2371 return self._extra_select_cache
2372 if not self.extra:
2373 return {}
2374 elif self.extra_select_mask is not None:
2375 self._extra_select_cache = {
2376 k: v for k, v in self.extra.items() if k in self.extra_select_mask
2377 }
2378 return self._extra_select_cache
2379 else:
2380 return self.extra
2381
2382 def trim_start(self, names_with_path):
2383 """
2384 Trim joins from the start of the join path. The candidates for trim
2385 are the PathInfos in names_with_path structure that are m2m joins.
2386
2387 Also set the select column so the start matches the join.
2388
2389 This method is meant to be used for generating the subquery joins &
2390 cols in split_exclude().
2391
2392 Return a lookup usable for doing outerq.filter(lookup=self) and a
2393 boolean indicating if the joins in the prefix contain a LEFT OUTER join.
2394 _"""
2395 all_paths = []
2396 for _, paths in names_with_path:
2397 all_paths.extend(paths)
2398 contains_louter = False
2399 # Trim and operate only on tables that were generated for
2400 # the lookup part of the query. That is, avoid trimming
2401 # joins generated for F() expressions.
2402 lookup_tables = [
2403 t for t in self.alias_map if t in self._lookup_joins or t == self.base_table
2404 ]
2405 for trimmed_paths, path in enumerate(all_paths):
2406 if path.m2m:
2407 break
2408 if self.alias_map[lookup_tables[trimmed_paths + 1]].join_type == LOUTER:
2409 contains_louter = True
2410 alias = lookup_tables[trimmed_paths]
2411 self.unref_alias(alias)
2412 # The path.join_field is a Rel, lets get the other side's field
2413 join_field = path.join_field.field
2414 # Build the filter prefix.
2415 paths_in_prefix = trimmed_paths
2416 trimmed_prefix = []
2417 for name, path in names_with_path:
2418 if paths_in_prefix - len(path) < 0:
2419 break
2420 trimmed_prefix.append(name)
2421 paths_in_prefix -= len(path)
2422 trimmed_prefix.append(join_field.foreign_related_fields[0].name)
2423 trimmed_prefix = LOOKUP_SEP.join(trimmed_prefix)
2424 # Lets still see if we can trim the first join from the inner query
2425 # (that is, self). We can't do this for:
2426 # - LEFT JOINs because we would miss those rows that have nothing on
2427 # the outer side,
2428 # - INNER JOINs from filtered relations because we would miss their
2429 # filters.
2430 first_join = self.alias_map[lookup_tables[trimmed_paths + 1]]
2431 if first_join.join_type != LOUTER and not first_join.filtered_relation:
2432 select_fields = [r[0] for r in join_field.related_fields]
2433 select_alias = lookup_tables[trimmed_paths + 1]
2434 self.unref_alias(lookup_tables[trimmed_paths])
2435 extra_restriction = join_field.get_extra_restriction(
2436 None, lookup_tables[trimmed_paths + 1]
2437 )
2438 if extra_restriction:
2439 self.where.add(extra_restriction, AND)
2440 else:
2441 # TODO: It might be possible to trim more joins from the start of the
2442 # inner query if it happens to have a longer join chain containing the
2443 # values in select_fields. Lets punt this one for now.
2444 select_fields = [r[1] for r in join_field.related_fields]
2445 select_alias = lookup_tables[trimmed_paths]
2446 # The found starting point is likely a join_class instead of a
2447 # base_table_class reference. But the first entry in the query's FROM
2448 # clause must not be a JOIN.
2449 for table in self.alias_map:
2450 if self.alias_refcount[table] > 0:
2451 self.alias_map[table] = self.base_table_class(
2452 self.alias_map[table].table_name,
2453 table,
2454 )
2455 break
2456 self.set_select([f.get_col(select_alias) for f in select_fields])
2457 return trimmed_prefix, contains_louter
2458
2459 def is_nullable(self, field):
2460 """Check if the given field should be treated as nullable."""
2461 # QuerySet does not have knowledge of which connection is going to be
2462 # used. For the single-database setup we always reference the default
2463 # connection here.
2464 return field.allow_null
2465
2466
2467def get_order_dir(field, default="ASC"):
2468 """
2469 Return the field name and direction for an order specification. For
2470 example, '-foo' is returned as ('foo', 'DESC').
2471
2472 The 'default' param is used to indicate which way no prefix (or a '+'
2473 prefix) should sort. The '-' prefix always sorts the opposite way.
2474 """
2475 dirn = ORDER_DIR[default]
2476 if field[0] == "-":
2477 return field[1:], dirn[1]
2478 return field, dirn[0]
2479
2480
2481class JoinPromoter:
2482 """
2483 A class to abstract away join promotion problems for complex filter
2484 conditions.
2485 """
2486
2487 def __init__(self, connector, num_children, negated):
2488 self.connector = connector
2489 self.negated = negated
2490 if self.negated:
2491 if connector == AND:
2492 self.effective_connector = OR
2493 else:
2494 self.effective_connector = AND
2495 else:
2496 self.effective_connector = self.connector
2497 self.num_children = num_children
2498 # Maps of table alias to how many times it is seen as required for
2499 # inner and/or outer joins.
2500 self.votes = Counter()
2501
2502 def __repr__(self):
2503 return (
2504 f"{self.__class__.__qualname__}(connector={self.connector!r}, "
2505 f"num_children={self.num_children!r}, negated={self.negated!r})"
2506 )
2507
2508 def add_votes(self, votes):
2509 """
2510 Add single vote per item to self.votes. Parameter can be any
2511 iterable.
2512 """
2513 self.votes.update(votes)
2514
2515 def update_join_types(self, query):
2516 """
2517 Change join types so that the generated query is as efficient as
2518 possible, but still correct. So, change as many joins as possible
2519 to INNER, but don't make OUTER joins INNER if that could remove
2520 results from the query.
2521 """
2522 to_promote = set()
2523 to_demote = set()
2524 # The effective_connector is used so that NOT (a AND b) is treated
2525 # similarly to (a OR b) for join promotion.
2526 for table, votes in self.votes.items():
2527 # We must use outer joins in OR case when the join isn't contained
2528 # in all of the joins. Otherwise the INNER JOIN itself could remove
2529 # valid results. Consider the case where a model with rel_a and
2530 # rel_b relations is queried with rel_a__col=1 | rel_b__col=2. Now,
2531 # if rel_a join doesn't produce any results is null (for example
2532 # reverse foreign key or null value in direct foreign key), and
2533 # there is a matching row in rel_b with col=2, then an INNER join
2534 # to rel_a would remove a valid match from the query. So, we need
2535 # to promote any existing INNER to LOUTER (it is possible this
2536 # promotion in turn will be demoted later on).
2537 if self.effective_connector == OR and votes < self.num_children:
2538 to_promote.add(table)
2539 # If connector is AND and there is a filter that can match only
2540 # when there is a joinable row, then use INNER. For example, in
2541 # rel_a__col=1 & rel_b__col=2, if either of the rels produce NULL
2542 # as join output, then the col=1 or col=2 can't match (as
2543 # NULL=anything is always false).
2544 # For the OR case, if all children voted for a join to be inner,
2545 # then we can use INNER for the join. For example:
2546 # (rel_a__col__icontains=Alex | rel_a__col__icontains=Russell)
2547 # then if rel_a doesn't produce any rows, the whole condition
2548 # can't match. Hence we can safely use INNER join.
2549 if self.effective_connector == AND or (
2550 self.effective_connector == OR and votes == self.num_children
2551 ):
2552 to_demote.add(table)
2553 # Finally, what happens in cases where we have:
2554 # (rel_a__col=1|rel_b__col=2) & rel_a__col__gte=0
2555 # Now, we first generate the OR clause, and promote joins for it
2556 # in the first if branch above. Both rel_a and rel_b are promoted
2557 # to LOUTER joins. After that we do the AND case. The OR case
2558 # voted no inner joins but the rel_a__col__gte=0 votes inner join
2559 # for rel_a. We demote it back to INNER join (in AND case a single
2560 # vote is enough). The demotion is OK, if rel_a doesn't produce
2561 # rows, then the rel_a__col__gte=0 clause can't be true, and thus
2562 # the whole clause must be false. So, it is safe to use INNER
2563 # join.
2564 # Note that in this example we could just as well have the __gte
2565 # clause and the OR clause swapped. Or we could replace the __gte
2566 # clause with an OR clause containing rel_a__col=1|rel_a__col=2,
2567 # and again we could safely demote to INNER.
2568 query.promote_joins(to_promote)
2569 query.demote_joins(to_demote)
2570 return to_demote