Files
netdeploy.net/netdeploy/lib/python3.11/site-packages/dns/btree.py

851 lines
30 KiB
Python

# Copyright (C) Dnspython Contributors, see LICENSE for text of ISC license
"""
A BTree in the style of Cormen, Leiserson, and Rivest's "Algorithms" book, with
copy-on-write node updates, cursors, and optional space optimization for mostly-in-order
insertion.
"""
from collections.abc import MutableMapping, MutableSet
from typing import Any, Callable, Generic, Optional, Tuple, TypeVar, cast
DEFAULT_T = 127
KT = TypeVar("KT") # the type of a key in Element
class Element(Generic[KT]):
"""All items stored in the BTree are Elements."""
def key(self) -> KT:
"""The key for this element; the returned type must implement comparison."""
raise NotImplementedError # pragma: no cover
ET = TypeVar("ET", bound=Element) # the type of a value in a _KV
def _MIN(t: int) -> int:
"""The minimum number of keys in a non-root node for a BTree with the specified
``t``
"""
return t - 1
def _MAX(t: int) -> int:
"""The maximum number of keys in node for a BTree with the specified ``t``"""
return 2 * t - 1
class _Creator:
"""A _Creator class instance is used as a unique id for the BTree which created
a node.
We use a dedicated creator rather than just a BTree reference to avoid circularity
that would complicate GC.
"""
def __str__(self): # pragma: no cover
return f"{id(self):x}"
class _Node(Generic[KT, ET]):
"""A Node in the BTree.
A Node (leaf or internal) of the BTree.
"""
__slots__ = ["t", "creator", "is_leaf", "elts", "children"]
def __init__(self, t: int, creator: _Creator, is_leaf: bool):
assert t >= 3
self.t = t
self.creator = creator
self.is_leaf = is_leaf
self.elts: list[ET] = []
self.children: list[_Node[KT, ET]] = []
def is_maximal(self) -> bool:
"""Does this node have the maximal number of keys?"""
assert len(self.elts) <= _MAX(self.t)
return len(self.elts) == _MAX(self.t)
def is_minimal(self) -> bool:
"""Does this node have the minimal number of keys?"""
assert len(self.elts) >= _MIN(self.t)
return len(self.elts) == _MIN(self.t)
def search_in_node(self, key: KT) -> tuple[int, bool]:
"""Get the index of the ``Element`` matching ``key`` or the index of its
least successor.
Returns a tuple of the index and an ``equal`` boolean that is ``True`` iff.
the key was found.
"""
l = len(self.elts)
if l > 0 and key > self.elts[l - 1].key():
# This is optimizing near in-order insertion.
return l, False
l = 0
i = len(self.elts)
r = i - 1
equal = False
while l <= r:
m = (l + r) // 2
k = self.elts[m].key()
if key == k:
i = m
equal = True
break
elif key < k:
i = m
r = m - 1
else:
l = m + 1
return i, equal
def maybe_cow_child(self, index: int) -> "_Node[KT, ET]":
assert not self.is_leaf
child = self.children[index]
cloned = child.maybe_cow(self.creator)
if cloned:
self.children[index] = cloned
return cloned
else:
return child
def _get_node(self, key: KT) -> Tuple[Optional["_Node[KT, ET]"], int]:
"""Get the node associated with key and its index, doing
copy-on-write if we have to descend.
Returns a tuple of the node and the index, or the tuple ``(None, 0)``
if the key was not found.
"""
i, equal = self.search_in_node(key)
if equal:
return (self, i)
elif self.is_leaf:
return (None, 0)
else:
child = self.maybe_cow_child(i)
return child._get_node(key)
def get(self, key: KT) -> ET | None:
"""Get the element associated with *key* or return ``None``"""
i, equal = self.search_in_node(key)
if equal:
return self.elts[i]
elif self.is_leaf:
return None
else:
return self.children[i].get(key)
def optimize_in_order_insertion(self, index: int) -> None:
"""Try to minimize the number of Nodes in a BTree where the insertion
is done in-order or close to it, by stealing as much as we can from our
right sibling.
If we don't do this, then an in-order insertion will produce a BTree
where most of the nodes are minimal.
"""
if index == 0:
return
left = self.children[index - 1]
if len(left.elts) == _MAX(self.t):
return
left = self.maybe_cow_child(index - 1)
while len(left.elts) < _MAX(self.t):
if not left.try_right_steal(self, index - 1):
break
def insert_nonfull(self, element: ET, in_order: bool) -> ET | None:
assert not self.is_maximal()
while True:
key = element.key()
i, equal = self.search_in_node(key)
if equal:
# replace
old = self.elts[i]
self.elts[i] = element
return old
elif self.is_leaf:
self.elts.insert(i, element)
return None
else:
child = self.maybe_cow_child(i)
if child.is_maximal():
self.adopt(*child.split())
# Splitting might result in our target moving to us, so
# search again.
continue
oelt = child.insert_nonfull(element, in_order)
if in_order:
self.optimize_in_order_insertion(i)
return oelt
def split(self) -> tuple["_Node[KT, ET]", ET, "_Node[KT, ET]"]:
"""Split a maximal node into two minimal ones and a central element."""
assert self.is_maximal()
right = self.__class__(self.t, self.creator, self.is_leaf)
right.elts = list(self.elts[_MIN(self.t) + 1 :])
middle = self.elts[_MIN(self.t)]
self.elts = list(self.elts[: _MIN(self.t)])
if not self.is_leaf:
right.children = list(self.children[_MIN(self.t) + 1 :])
self.children = list(self.children[: _MIN(self.t) + 1])
return self, middle, right
def try_left_steal(self, parent: "_Node[KT, ET]", index: int) -> bool:
"""Try to steal from this Node's left sibling for balancing purposes.
Returns ``True`` if the theft was successful, or ``False`` if not.
"""
if index != 0:
left = parent.children[index - 1]
if not left.is_minimal():
left = parent.maybe_cow_child(index - 1)
elt = parent.elts[index - 1]
parent.elts[index - 1] = left.elts.pop()
self.elts.insert(0, elt)
if not left.is_leaf:
assert not self.is_leaf
child = left.children.pop()
self.children.insert(0, child)
return True
return False
def try_right_steal(self, parent: "_Node[KT, ET]", index: int) -> bool:
"""Try to steal from this Node's right sibling for balancing purposes.
Returns ``True`` if the theft was successful, or ``False`` if not.
"""
if index + 1 < len(parent.children):
right = parent.children[index + 1]
if not right.is_minimal():
right = parent.maybe_cow_child(index + 1)
elt = parent.elts[index]
parent.elts[index] = right.elts.pop(0)
self.elts.append(elt)
if not right.is_leaf:
assert not self.is_leaf
child = right.children.pop(0)
self.children.append(child)
return True
return False
def adopt(self, left: "_Node[KT, ET]", middle: ET, right: "_Node[KT, ET]") -> None:
"""Adopt left, middle, and right into our Node (which must not be maximal,
and which must not be a leaf). In the case were we are not the new root,
then the left child must already be in the Node."""
assert not self.is_maximal()
assert not self.is_leaf
key = middle.key()
i, equal = self.search_in_node(key)
assert not equal
self.elts.insert(i, middle)
if len(self.children) == 0:
# We are the new root
self.children = [left, right]
else:
assert self.children[i] == left
self.children.insert(i + 1, right)
def merge(self, parent: "_Node[KT, ET]", index: int) -> None:
"""Merge this node's parent and its right sibling into this node."""
right = parent.children.pop(index + 1)
self.elts.append(parent.elts.pop(index))
self.elts.extend(right.elts)
if not self.is_leaf:
self.children.extend(right.children)
def minimum(self) -> ET:
"""The least element in this subtree."""
if self.is_leaf:
return self.elts[0]
else:
return self.children[0].minimum()
def maximum(self) -> ET:
"""The greatest element in this subtree."""
if self.is_leaf:
return self.elts[-1]
else:
return self.children[-1].maximum()
def balance(self, parent: "_Node[KT, ET]", index: int) -> None:
"""This Node is minimal, and we want to make it non-minimal so we can delete.
We try to steal from our siblings, and if that doesn't work we will merge
with one of them."""
assert not parent.is_leaf
if self.try_left_steal(parent, index):
return
if self.try_right_steal(parent, index):
return
# Stealing didn't work, so both siblings must be minimal.
if index == 0:
# We are the left-most node so merge with our right sibling.
self.merge(parent, index)
else:
# Have our left sibling merge with us. This lets us only have "merge right"
# code.
left = parent.maybe_cow_child(index - 1)
left.merge(parent, index - 1)
def delete(
self, key: KT, parent: Optional["_Node[KT, ET]"], exact: ET | None
) -> ET | None:
"""Delete an element matching *key* if it exists. If *exact* is not ``None``
then it must be an exact match with that element. The Node must not be
minimal unless it is the root."""
assert parent is None or not self.is_minimal()
i, equal = self.search_in_node(key)
original_key = None
if equal:
# Note we use "is" here as we meant "exactly this object".
if exact is not None and self.elts[i] is not exact:
raise ValueError("exact delete did not match existing elt")
if self.is_leaf:
return self.elts.pop(i)
# Note we need to ensure exact is None going forward as we've
# already checked exactness and are about to change our target key
# to the least successor.
exact = None
original_key = key
least_successor = self.children[i + 1].minimum()
key = least_successor.key()
i = i + 1
if self.is_leaf:
# No match
if exact is not None:
raise ValueError("exact delete had no match")
return None
# recursively delete in the appropriate child
child = self.maybe_cow_child(i)
if child.is_minimal():
child.balance(self, i)
# Things may have moved.
i, equal = self.search_in_node(key)
assert not equal
child = self.children[i]
assert not child.is_minimal()
elt = child.delete(key, self, exact)
if original_key is not None:
node, i = self._get_node(original_key)
assert node is not None
assert elt is not None
oelt = node.elts[i]
node.elts[i] = elt
elt = oelt
return elt
def visit_in_order(self, visit: Callable[[ET], None]) -> None:
"""Call *visit* on all of the elements in order."""
for i, elt in enumerate(self.elts):
if not self.is_leaf:
self.children[i].visit_in_order(visit)
visit(elt)
if not self.is_leaf:
self.children[-1].visit_in_order(visit)
def _visit_preorder_by_node(self, visit: Callable[["_Node[KT, ET]"], None]) -> None:
"""Visit nodes in preorder. This method is only used for testing."""
visit(self)
if not self.is_leaf:
for child in self.children:
child._visit_preorder_by_node(visit)
def maybe_cow(self, creator: _Creator) -> Optional["_Node[KT, ET]"]:
"""Return a clone of this Node if it was not created by *creator*, or ``None``
otherwise (i.e. copy for copy-on-write if we haven't already copied it)."""
if self.creator is not creator:
return self.clone(creator)
else:
return None
def clone(self, creator: _Creator) -> "_Node[KT, ET]":
"""Make a shallow-copy duplicate of this node."""
cloned = self.__class__(self.t, creator, self.is_leaf)
cloned.elts.extend(self.elts)
if not self.is_leaf:
cloned.children.extend(self.children)
return cloned
def __str__(self): # pragma: no cover
if not self.is_leaf:
children = " " + " ".join([f"{id(c):x}" for c in self.children])
else:
children = ""
return f"{id(self):x} {self.creator} {self.elts}{children}"
class Cursor(Generic[KT, ET]):
"""A seekable cursor for a BTree.
If you are going to use a cursor on a mutable BTree, you should use it
in a ``with`` block so that any mutations of the BTree automatically park
the cursor.
"""
def __init__(self, btree: "BTree[KT, ET]"):
self.btree = btree
self.current_node: _Node | None = None
# The current index is the element index within the current node, or
# if there is no current node then it is 0 on the left boundary and 1
# on the right boundary.
self.current_index: int = 0
self.recurse = False
self.increasing = True
self.parents: list[tuple[_Node, int]] = []
self.parked = False
self.parking_key: KT | None = None
self.parking_key_read = False
def _seek_least(self) -> None:
# seek to the least value in the subtree beneath the current index of the
# current node
assert self.current_node is not None
while not self.current_node.is_leaf:
self.parents.append((self.current_node, self.current_index))
self.current_node = self.current_node.children[self.current_index]
assert self.current_node is not None
self.current_index = 0
def _seek_greatest(self) -> None:
# seek to the greatest value in the subtree beneath the current index of the
# current node
assert self.current_node is not None
while not self.current_node.is_leaf:
self.parents.append((self.current_node, self.current_index))
self.current_node = self.current_node.children[self.current_index]
assert self.current_node is not None
self.current_index = len(self.current_node.elts)
def park(self):
"""Park the cursor.
A cursor must be "parked" before mutating the BTree to avoid undefined behavior.
Cursors created in a ``with`` block register with their BTree and will park
automatically. Note that a parked cursor may not observe some changes made when
it is parked; for example a cursor being iterated with next() will not see items
inserted before its current position.
"""
if not self.parked:
self.parked = True
def _maybe_unpark(self):
if self.parked:
if self.parking_key is not None:
# remember our increasing hint, as seeking might change it
increasing = self.increasing
if self.parking_key_read:
# We've already returned the parking key, so we want to be before it
# if decreasing and after it if increasing.
before = not self.increasing
else:
# We haven't returned the parking key, so we've parked right
# after seeking or are on a boundary. Either way, the before
# hint we want is the value of self.increasing.
before = self.increasing
self.seek(self.parking_key, before)
self.increasing = increasing # might have been altered by seek()
self.parked = False
self.parking_key = None
def prev(self) -> ET | None:
"""Get the previous element, or return None if on the left boundary."""
self._maybe_unpark()
self.parking_key = None
if self.current_node is None:
# on a boundary
if self.current_index == 0:
# left boundary, there is no prev
return None
else:
assert self.current_index == 1
# right boundary; seek to the actual boundary
# so we can do a prev()
self.current_node = self.btree.root
self.current_index = len(self.btree.root.elts)
self._seek_greatest()
while True:
if self.recurse:
if not self.increasing:
# We only want to recurse if we are continuing in the decreasing
# direction.
self._seek_greatest()
self.recurse = False
self.increasing = False
self.current_index -= 1
if self.current_index >= 0:
elt = self.current_node.elts[self.current_index]
if not self.current_node.is_leaf:
self.recurse = True
self.parking_key = elt.key()
self.parking_key_read = True
return elt
else:
if len(self.parents) > 0:
self.current_node, self.current_index = self.parents.pop()
else:
self.current_node = None
self.current_index = 0
return None
def next(self) -> ET | None:
"""Get the next element, or return None if on the right boundary."""
self._maybe_unpark()
self.parking_key = None
if self.current_node is None:
# on a boundary
if self.current_index == 1:
# right boundary, there is no next
return None
else:
assert self.current_index == 0
# left boundary; seek to the actual boundary
# so we can do a next()
self.current_node = self.btree.root
self.current_index = 0
self._seek_least()
while True:
if self.recurse:
if self.increasing:
# We only want to recurse if we are continuing in the increasing
# direction.
self._seek_least()
self.recurse = False
self.increasing = True
if self.current_index < len(self.current_node.elts):
elt = self.current_node.elts[self.current_index]
self.current_index += 1
if not self.current_node.is_leaf:
self.recurse = True
self.parking_key = elt.key()
self.parking_key_read = True
return elt
else:
if len(self.parents) > 0:
self.current_node, self.current_index = self.parents.pop()
else:
self.current_node = None
self.current_index = 1
return None
def _adjust_for_before(self, before: bool, i: int) -> None:
if before:
self.current_index = i
else:
self.current_index = i + 1
def seek(self, key: KT, before: bool = True) -> None:
"""Seek to the specified key.
If *before* is ``True`` (the default) then the cursor is positioned just
before *key* if it exists, or before its least successor if it doesn't. A
subsequent next() will retrieve this value. If *before* is ``False``, then
the cursor is positioned just after *key* if it exists, or its greatest
precessessor if it doesn't. A subsequent prev() will return this value.
"""
self.current_node = self.btree.root
assert self.current_node is not None
self.recurse = False
self.parents = []
self.increasing = before
self.parked = False
self.parking_key = key
self.parking_key_read = False
while not self.current_node.is_leaf:
i, equal = self.current_node.search_in_node(key)
if equal:
self._adjust_for_before(before, i)
if before:
self._seek_greatest()
else:
self._seek_least()
return
self.parents.append((self.current_node, i))
self.current_node = self.current_node.children[i]
assert self.current_node is not None
i, equal = self.current_node.search_in_node(key)
if equal:
self._adjust_for_before(before, i)
else:
self.current_index = i
def seek_first(self) -> None:
"""Seek to the left boundary (i.e. just before the least element).
A subsequent next() will return the least element if the BTree isn't empty."""
self.current_node = None
self.current_index = 0
self.recurse = False
self.increasing = True
self.parents = []
self.parked = False
self.parking_key = None
def seek_last(self) -> None:
"""Seek to the right boundary (i.e. just after the greatest element).
A subsequent prev() will return the greatest element if the BTree isn't empty.
"""
self.current_node = None
self.current_index = 1
self.recurse = False
self.increasing = False
self.parents = []
self.parked = False
self.parking_key = None
def __enter__(self):
self.btree.register_cursor(self)
return self
def __exit__(self, exc_type, exc_value, traceback):
self.btree.deregister_cursor(self)
return False
class Immutable(Exception):
"""The BTree is immutable."""
class BTree(Generic[KT, ET]):
"""An in-memory BTree with copy-on-write and cursors."""
def __init__(self, *, t: int = DEFAULT_T, original: Optional["BTree"] = None):
"""Create a BTree.
If *original* is not ``None``, then the BTree is shallow-cloned from
*original* using copy-on-write. Otherwise a new BTree with the specified
*t* value is created.
The BTree is not thread-safe.
"""
# We don't use a reference to ourselves as a creator as we don't want
# to prevent GC of old btrees.
self.creator = _Creator()
self._immutable = False
self.t: int
self.root: _Node
self.size: int
self.cursors: set[Cursor] = set()
if original is not None:
if not original._immutable:
raise ValueError("original BTree is not immutable")
self.t = original.t
self.root = original.root
self.size = original.size
else:
if t < 3:
raise ValueError("t must be >= 3")
self.t = t
self.root = _Node(self.t, self.creator, True)
self.size = 0
def make_immutable(self):
"""Make the BTree immutable.
Attempts to alter the BTree after making it immutable will raise an
Immutable exception. This operation cannot be undone.
"""
if not self._immutable:
self._immutable = True
def _check_mutable_and_park(self) -> None:
if self._immutable:
raise Immutable
for cursor in self.cursors:
cursor.park()
# Note that we don't use insert() and delete() but rather insert_element() and
# delete_key() so that BTreeDict can be a proper MutableMapping and supply the
# rest of the standard mapping API.
def insert_element(self, elt: ET, in_order: bool = False) -> ET | None:
"""Insert the element into the BTree.
If *in_order* is ``True``, then extra work will be done to make left siblings
full, which optimizes storage space when the the elements are inserted in-order
or close to it.
Returns the previously existing element at the element's key or ``None``.
"""
self._check_mutable_and_park()
cloned = self.root.maybe_cow(self.creator)
if cloned:
self.root = cloned
if self.root.is_maximal():
old_root = self.root
self.root = _Node(self.t, self.creator, False)
self.root.adopt(*old_root.split())
oelt = self.root.insert_nonfull(elt, in_order)
if oelt is None:
# We did not replace, so something was added.
self.size += 1
return oelt
def get_element(self, key: KT) -> ET | None:
"""Get the element matching *key* from the BTree, or return ``None`` if it
does not exist.
"""
return self.root.get(key)
def _delete(self, key: KT, exact: ET | None) -> ET | None:
self._check_mutable_and_park()
cloned = self.root.maybe_cow(self.creator)
if cloned:
self.root = cloned
elt = self.root.delete(key, None, exact)
if elt is not None:
# We deleted something
self.size -= 1
if len(self.root.elts) == 0:
# The root is now empty. If there is a child, then collapse this root
# level and make the child the new root.
if not self.root.is_leaf:
assert len(self.root.children) == 1
self.root = self.root.children[0]
return elt
def delete_key(self, key: KT) -> ET | None:
"""Delete the element matching *key* from the BTree.
Returns the matching element or ``None`` if it does not exist.
"""
return self._delete(key, None)
def delete_exact(self, element: ET) -> ET | None:
"""Delete *element* from the BTree.
Returns the matching element or ``None`` if it was not in the BTree.
"""
delt = self._delete(element.key(), element)
assert delt is element
return delt
def __len__(self):
return self.size
def visit_in_order(self, visit: Callable[[ET], None]) -> None:
"""Call *visit*(element) on all elements in the tree in sorted order."""
self.root.visit_in_order(visit)
def _visit_preorder_by_node(self, visit: Callable[[_Node], None]) -> None:
self.root._visit_preorder_by_node(visit)
def cursor(self) -> Cursor[KT, ET]:
"""Create a cursor."""
return Cursor(self)
def register_cursor(self, cursor: Cursor) -> None:
"""Register a cursor for the automatic parking service."""
self.cursors.add(cursor)
def deregister_cursor(self, cursor: Cursor) -> None:
"""Deregister a cursor from the automatic parking service."""
self.cursors.discard(cursor)
def __copy__(self):
return self.__class__(original=self)
def __iter__(self):
with self.cursor() as cursor:
while True:
elt = cursor.next()
if elt is None:
break
yield elt.key()
VT = TypeVar("VT") # the type of a value in a BTreeDict
class KV(Element, Generic[KT, VT]):
"""The BTree element type used in a ``BTreeDict``."""
def __init__(self, key: KT, value: VT):
self._key = key
self._value = value
def key(self) -> KT:
return self._key
def value(self) -> VT:
return self._value
def __str__(self): # pragma: no cover
return f"KV({self._key}, {self._value})"
def __repr__(self): # pragma: no cover
return f"KV({self._key}, {self._value})"
class BTreeDict(Generic[KT, VT], BTree[KT, KV[KT, VT]], MutableMapping[KT, VT]):
"""A MutableMapping implemented with a BTree.
Unlike a normal Python dict, the BTreeDict may be mutated while iterating.
"""
def __init__(
self,
*,
t: int = DEFAULT_T,
original: BTree | None = None,
in_order: bool = False,
):
super().__init__(t=t, original=original)
self.in_order = in_order
def __getitem__(self, key: KT) -> VT:
elt = self.get_element(key)
if elt is None:
raise KeyError
else:
return cast(KV, elt).value()
def __setitem__(self, key: KT, value: VT) -> None:
elt = KV(key, value)
self.insert_element(elt, self.in_order)
def __delitem__(self, key: KT) -> None:
if self.delete_key(key) is None:
raise KeyError
class Member(Element, Generic[KT]):
"""The BTree element type used in a ``BTreeSet``."""
def __init__(self, key: KT):
self._key = key
def key(self) -> KT:
return self._key
class BTreeSet(BTree, Generic[KT], MutableSet[KT]):
"""A MutableSet implemented with a BTree.
Unlike a normal Python set, the BTreeSet may be mutated while iterating.
"""
def __init__(
self,
*,
t: int = DEFAULT_T,
original: BTree | None = None,
in_order: bool = False,
):
super().__init__(t=t, original=original)
self.in_order = in_order
def __contains__(self, key: Any) -> bool:
return self.get_element(key) is not None
def add(self, value: KT) -> None:
elt = Member(value)
self.insert_element(elt, self.in_order)
def discard(self, value: KT) -> None:
self.delete_key(value)