Source code for core.orm.cache

""" Provides a simple and mostly transparent way of defining orm-cached
properties on the application class.

For example::

    from onegov.core import Framework
    from onegov.core.orm import orm_cached

    class App(Framework):

        @orm_cached(policy='on-table-change:users')
        def users(self):
            # ... fetch users from database

Properties defined in this way are accessible through the instance::

    app.users

If there are any changes to the users table, the cache is removed. Since the
cache is usually a shared redis instance, this works for multiple processes.

"""

import inspect

from functools import wraps
from libres.db.models import ORMBase
from onegov.core.orm.utils import maybe_merge
from sqlalchemy.orm.query import Query
from time import time


from typing import cast, overload, Any, Generic, TypeVar, TYPE_CHECKING
if TYPE_CHECKING:
    from collections.abc import Callable, Iterator
    from morepath.request import Request
    from onegov.core.framework import Framework
    from typing import Protocol
    from typing import Self

    from . import Base
    from .session_manager import SessionManager
    from ..cache import RedisCacheRegion

    # NOTE: it would be more correct to make OrmCacheApp the first
    #       argument, but this gets a bit complicated for actually
    #       using the decorator
[docs] Creator = Callable[[Any], '_T']
CachePolicy = str | Callable[[Base], bool] _T_co = TypeVar('_T_co', covariant=True) _FrameworkT = TypeVar('_FrameworkT', bound=Framework, contravariant=True) class _RequestCached(Protocol[_FrameworkT, _T_co]): @overload def __get__( self, instance: None, owner: type[_FrameworkT] ) -> property: ... @overload def __get__( self, instance: _FrameworkT, owner: type[_FrameworkT] ) -> _T_co: ... class _OrmCacheDecorator(Protocol): @overload def __call__( self, fn: 'Creator[Query[_T]]' ) -> 'OrmCacheDescriptor[tuple[_T, ...]]': ... @overload def __call__( self, fn: 'Creator[_T]' ) -> 'OrmCacheDescriptor[_T]': ... class _HasApp(Protocol): @property def app(self) -> 'OrmCacheApp': ...
[docs] _T = TypeVar('_T')
[docs] _QT = TypeVar('_QT')
[docs] unset = object()
[docs] class OrmCacheApp: """ Integrates the orm cache handling into the application (i.e. :class:`onegov.core.framework.Framework`). In addition, the application needs to call :meth:`setup_orm_cache` inside of `:meth:onegov.server.application.Application.set_application_id` to enable the cache evicition mechanism. """ if TYPE_CHECKING: # forward declare the attributes we need from Framework
[docs] session_manager: SessionManager
schema: str @property def cache(self) -> RedisCacheRegion: ... request_cache: dict[str, Any] request_class: type[Request] schema_cache: dict[str, Any]
[docs] def configure_orm_cache(self, **cfg: Any) -> None: self.is_orm_cache_setup = getattr(self, 'is_orm_cache_setup', False)
[docs] def setup_orm_cache(self) -> None: """ Sets up the event handlers for the change-detection. """ assert not self.is_orm_cache_setup for descriptor in self.orm_cache_descriptors: self.session_manager.on_insert.connect( self.descriptor_bound_orm_change_handler(descriptor), weak=False) self.session_manager.on_update.connect( self.descriptor_bound_orm_change_handler(descriptor), weak=False) self.session_manager.on_delete.connect( self.descriptor_bound_orm_change_handler(descriptor), weak=False) self.is_orm_cache_setup = True
[docs] def descriptor_bound_orm_change_handler( self, descriptor: 'OrmCacheDescriptor[Any]' ) -> 'Callable[[str, Base], None]': """ Listens to changes to the database and evicts the cache if the policy demands it. Available policies: * policy='on-table-change:table': clears the cache if there's a change on the given table * policy=lambda obj: ...: clears the cache if the given policy function returns true (it receives the object which has changed) """ def handle_orm_change(schema: str, obj: 'Base') -> None: if callable(descriptor.cache_policy): dirty = descriptor.cache_policy(obj) elif descriptor.cache_policy.startswith('on-table-change'): tablename = descriptor.cache_policy.split(':')[-1] dirty = obj.__class__.__tablename__ == tablename else: raise NotImplementedError() if dirty: # Two circumstances ensure that only the cache of the current # schema is evicted: # # 1. There's a current schema set when the event is fired. # 2. The current cache is bound to the current schema. # # Still, trust but verify: assert self.schema == schema for cache_key in descriptor.used_cache_keys: self.cache.delete(cache_key) # NOTE: We also need to delete the timestamp, so we don't # get stuck on an old timestamp forever, we use # get_or_create for the timestamp below in order to # avoid data races in cache invalidation self.cache.delete(f'{cache_key}_ts') if cache_key in self.schema_cache: del self.schema_cache[cache_key] if cache_key in self.request_cache: del self.request_cache[cache_key] return handle_orm_change
@property
[docs] def orm_cache_descriptors(self) -> 'Iterator[OrmCacheDescriptor[Any]]': """ Yields all orm cache descriptors installed on the class. """ for member_name, member in inspect.getmembers(self.__class__): if isinstance(member, OrmCacheDescriptor): yield member # some descriptors are installed on the linked request instead for member_name, member in inspect.getmembers(self.request_class): if isinstance(member, OrmCacheDescriptor): yield member
[docs] class OrmCacheDescriptor(Generic[_T]): """ The descriptor implements the protocol for fetching the objects either from cache or creating them using the :param:``creator``. You are not allowed to store ORM objects in this cache, since it leads to unpredictable results when attempting to merge the restored objects with the current session. """ #: A set of cache keys that have been accessed
[docs] used_cache_keys: set[str]
@overload def __init__( self: 'OrmCacheDescriptor[tuple[_QT, ...]]', cache_policy: 'CachePolicy', creator: 'Creator[Query[_QT]]', by_role: bool = False ): ... @overload def __init__( self: 'OrmCacheDescriptor[_T]', cache_policy: 'CachePolicy', creator: 'Creator[_T]', by_role: bool = False ): ... def __init__( self, cache_policy: 'CachePolicy', creator: 'Creator[Query[Any]] | Creator[_T]', by_role: bool = False ):
[docs] self.cache_policy = cache_policy
[docs] self.cache_key_prefix = creator.__qualname__
self.used_cache_keys = set()
[docs] self.creator = creator
[docs] self.by_role = by_role
[docs] def cache_key(self, obj: 'OrmCacheApp | _HasApp') -> str: if not self.by_role: return self.cache_key_prefix role = getattr(getattr(obj, 'identity', None), 'role', None) return f'{self.cache_key_prefix}-{role}'
[docs] def assert_no_orm_objects(self, obj: object, depth: int = 0) -> None: """ Ensures the object contains no ORM objects """ # FIXME: circular import from onegov.core.orm import Base assert not isinstance(obj, (Base, ORMBase)), ( 'You are not allowed to cache ORM objects with orm_cached.' ) # for performance reasons we only check the first level of nesting # we also run into recursion depth issues if two orm_cached properties # rely on one another if depth >= 1: return if isinstance(obj, str): # avoid infinite recursion pass elif hasattr(obj, 'items'): # we need to check keys as well as values for key, value in obj.items(): self.assert_no_orm_objects(key, depth + 1) self.assert_no_orm_objects(value, depth + 1) elif hasattr(obj, '__iter__'): # recurse into iterables for child in obj: self.assert_no_orm_objects(obj, depth + 1)
[docs] def create(self, instance: 'OrmCacheApp | _HasApp') -> _T: """ Uses the creator to load the object to be cached. Since the return value of the creator might not be something we want to cache, this function will turn some return values into something more useful (e.g. queries are completely fetched). """ result = self.creator(instance) if isinstance(result, Query): result = cast('_T', tuple(result)) self.assert_no_orm_objects(result) return result
[docs] def load(self, instance: 'OrmCacheApp | _HasApp') -> _T: """ Loads the object from the database or cache. """ if isinstance(instance, OrmCacheApp): app = instance else: app = instance.app # before accessing any cached values we need to make sure that all # pending changes are properly flushed -> this leads to some extra cpu # cycles spent but eliminates the chance of accessing a stale entry # after a change session = app.session_manager.session() if session.dirty: session.flush() cache_key = self.cache_key(instance) self.used_cache_keys.add(cache_key) # we use a tertiary request cache for even more lookup speed and to # make sure that inside a request we always get the exact same instance # (otherwise we don't see changes reflected) if cache_key in app.request_cache: return app.request_cache[cache_key] # we separately store when the redis cache was last populated # so we can detect when we need to invalidate the memory cache # dogpile has its own time metadata, but we can't retrieve it # without paying the deserialization overhead, defeating the # entire purpose of this secondary cache ts_key = f'{cache_key}_ts' # we use a secondary in-memory cache for more lookup speed ts, obj = app.schema_cache.get(cache_key, (float('-Inf'), unset)) if obj is unset or ts != app.cache.get(key=ts_key): # NOTE: Ideally we would create these values as a pair # but then we would have to start circumventing # most of dogpile's API, at which point we may # as well just use raw Redis, which would give us # even better possibilities. # A data race isn't really harmful here, but it is # kind of inefficient that we're sending two separate # Redis commands, when one would suffice. obj = app.cache.get_or_create( key=cache_key, creator=lambda: self.create(instance) ) ts = app.cache.get_or_create( key=ts_key, # NOTE: There are some corner-cases where time can lead # to incorrect cache-invalidation, but we can't use # monotonic, since that will not lead to a meaningful # comparison between different processes, dogpile # also uses time for its own cache invalidation, so # we should be fine creator=time ) app.schema_cache[cache_key] = (ts, obj) app.request_cache[cache_key] = obj return obj
# NOTE: Technically this descriptor should only work on # applications or objects with applications that derive # from OrmCacheApp, however since we heavily use mixins # that restriction becomes tedious, once Intersection # is a thing, we can restrict this once again @overload
[docs] def __get__( self, instance: None, owner: type[Any] ) -> 'Self': ...
@overload def __get__( self, instance: Any, owner: type[Any] ) -> _T: ... def __get__( self, instance: Any | None, owner: type[Any] ) -> 'Self | _T': """ Handles the object/cache access. """ if instance is None: return self return self.load(instance)
[docs] def orm_cached( policy: 'CachePolicy', by_role: bool = False ) -> '_OrmCacheDecorator': """ The decorator use to setup the cache descriptor. See the :mod:`onegov.core.orm.cache` docs for usage. """ @overload def orm_cache_decorator( fn: 'Creator[Query[_T]]' ) -> 'OrmCacheDescriptor[tuple[_T, ...]]': ... @overload def orm_cache_decorator( fn: 'Creator[_T]' ) -> 'OrmCacheDescriptor[_T]': ... def orm_cache_decorator(fn: 'Creator[Any]') -> 'OrmCacheDescriptor[Any]': return OrmCacheDescriptor(policy, fn, by_role) return orm_cache_decorator
[docs] def request_cached( appmethod: 'Callable[[_FrameworkT], _T]' ) -> '_RequestCached[_FrameworkT, _T]': """ This is like a request scoped :func:`orm_cached`. This may store ORM objects in contrast to :func:`orm_cached`, which should only be used to store other kinds of objects. """ cache_key = appmethod.__qualname__ @wraps(appmethod) def wrapper(self: '_FrameworkT') -> _T: session = self.session() # before accessing any cached values we need to make sure that all # pending changes are properly flushed -> this leads to some extra cpu # cycles spent but eliminates the chance of accessing a stale entry # after a change if session.dirty: session.flush() if cache_key in self.request_cache: return maybe_merge(self.session(), self.request_cache[cache_key]) self.request_cache[cache_key] = value = appmethod(self) return value return property(wrapper)