Source code for core.custom.custom_json

""" Sigh. Here we go again, *another* json implementation with support for:

- date
- datetime
- time

Because nobody else does all of these. And if they do (like standardjson), they
don't support decoding...

"""
from __future__ import annotations

import datetime
import isodate
import json
import re
import types

from decimal import Decimal
from itertools import chain
from onegov.core.cache import instance_lru_cache


from typing import overload, Any, ClassVar, Generic, TypeVar, TYPE_CHECKING
if TYPE_CHECKING:
    from _typeshed import SupportsRead, SupportsWrite
    from collections.abc import Callable, Collection, Iterator, Iterable
    from typing import TypeAlias

    from onegov.core.types import JSON_ro, JSONObject_ro

[docs] AnySerializer: TypeAlias = ( 'PrefixSerializer[_T] | DictionarySerializer[_T]')
[docs] _T = TypeVar('_T')
[docs] _ST = TypeVar('_ST', bound='JSON_ro')
[docs] class Serializer(Generic[_T, _ST]): """ Provides a way to encode all objects of a given class or its subclasses to and from json. """ def __init__(self, target: type[_T]): assert isinstance(target, type), 'expects a class'
[docs] self.target = target
[docs] def encode(self, obj: _T) -> _ST: raise NotImplementedError
[docs] def decode(self, value: _ST) -> _T: raise NotImplementedError
[docs] class PrefixSerializer(Serializer[_T, str]): """ Serializes objects to a string with a prefix. Resulting json values take the form of __prefix__@<value>, where <value> is the encoded value and __prefix__@ is the prefix that is used to differentiate between normal strings and encoded strings. Note that the part after the prefix is user-supplied and possibly unsafe. So something like an 'eval' should be out of the question! """
[docs] prefix_format = '__{}__@{}'
[docs] prefix_expression = re.compile(r'__(?P<prefix>[a-zA-Z]+)__@')
[docs] prefix_characters = re.compile(r'[a-zA-Z-]+')
def __init__( self, target: type[_T], prefix: str, encode: Callable[[_T], str], decode: Callable[[str], _T], ): super().__init__(target) assert self.prefix_characters.match(prefix)
[docs] self.prefix = prefix
[docs] self.prefix_length = len(self.prefix_format.format(prefix, ''))
[docs] self._encode = encode
[docs] self._decode = decode
[docs] def encode(self, obj: _T) -> str: return '__{}__@{}'.format(self.prefix, self._encode(obj))
[docs] def decode(self, string: str) -> _T: return self._decode(string[self.prefix_length:])
[docs] class DictionarySerializer(Serializer[_T, 'JSONObject_ro']): """ Serialises objects that can be built with keyword arguments. For example:: class Point: def __init__(self, x, y): self.x = x self.y = y Can be serialised using:: DictionarySerializer(Point, ('x', 'y')) Which results in something like this in JSON:: {'x': 1, 'y': 2} As the internal __dict__ represenation is of no concern, __slots__ may be used: class Point: __slots__ = ('x', 'y') def __init__(self, x, y): self.x = x self.y = y """ def __init__(self, target: type[_T], keys: Iterable[str]): super().__init__(target)
[docs] self.keys = frozenset(keys)
[docs] def encode(self, obj: _T) -> JSONObject_ro: return {k: getattr(obj, k) for k in self.keys}
[docs] def decode(self, dictionary: JSONObject_ro) -> _T: return self.target(**dictionary)
[docs] class Serializers: """ Organises the different serializer implementations under a unifiying interface. This allows the actual encoder/decoder to call a single class without having to worry how the various serializers need to be looked up and called. """
[docs] by_prefix: dict[str, PrefixSerializer[Any]]
[docs] by_keys: dict[frozenset[str], DictionarySerializer[Any]]
[docs] known_key_lengths: set[int]
def __init__(self) -> None: self.by_prefix = {} self.by_keys = {} self.known_key_lengths = set() @property
[docs] def registered(self) -> Iterator[AnySerializer[Any]]: return chain(self.by_prefix.values(), self.by_keys.values())
[docs] def register( self, serializer: PrefixSerializer[Any] | DictionarySerializer[Any] ) -> None: if isinstance(serializer, PrefixSerializer): self.by_prefix[serializer.prefix] = serializer elif isinstance(serializer, DictionarySerializer): self.by_keys[serializer.keys] = serializer self.known_key_lengths.add(len(serializer.keys)) else: raise NotImplementedError
[docs] def serializer_for( self, value: object ) -> AnySerializer[Any] | None: if isinstance(value, str): return self.serializer_for_string(value) if isinstance(value, dict): return self.serializer_for_dict(value) if isinstance(value, type): return self.serializer_for_class(value) return None
[docs] def serializer_for_string( self, string: str ) -> PrefixSerializer[Any] | None: match = PrefixSerializer.prefix_expression.match(string) if match is None: return None return self.by_prefix.get(match.group('prefix'))
[docs] def serializer_for_dict( self, dictionary: dict[str, Any] ) -> DictionarySerializer[Any] | None: # we can exit early for all dictionaries which cannot possibly match # the keys we're looking for by comparing the number of keys in the # dictionary - this is much cheaper than the next lookup if len(dictionary.keys()) not in self.known_key_lengths: return None return self.by_keys.get(frozenset(dictionary.keys()))
@instance_lru_cache(maxsize=16)
[docs] def serializer_for_class( self, cls: type[_T] ) -> AnySerializer[_T] | None: matches = (s for s in self.registered if issubclass(cls, s.target)) return next(matches, None)
[docs] def encode(self, value: object) -> JSON_ro: serializer = self.serializer_for(value.__class__) if serializer: return serializer.encode(value) if isinstance(value, types.GeneratorType): # FIXME: this is a little sus, generators will only work for # types that are already JSON serializable, since we # don't try to get a serializer for each generated value return tuple(v for v in value) raise TypeError('{} is not JSON serializable'.format(repr(value)))
[docs] def decode(self, value: Any) -> Any: serializer = self.serializer_for(value) if serializer: value = serializer.decode(value) elif isinstance(value, dict): for k, v in value.items(): value[k] = self.decode(v) return value
# The builtin serializers # FIXME: is the aim of these serializers to be fast, or should the # output be human readable? If it's the former, then we should # probably change some of these serializers, the iso format # is not very efficient in deserialization, a pair with a # timestamp and the timezone would be much faster
[docs] default_serializers = Serializers()
default_serializers.register(PrefixSerializer( prefix='datetime', target=datetime.datetime, encode=isodate.datetime_isoformat, decode=isodate.parse_datetime )) default_serializers.register(PrefixSerializer( prefix='time', target=datetime.time, encode=isodate.time_isoformat, decode=isodate.parse_time )) default_serializers.register(PrefixSerializer( prefix='date', target=datetime.date, encode=isodate.date_isoformat, decode=isodate.parse_date )) default_serializers.register(PrefixSerializer( prefix='decimal', target=Decimal, encode=str, decode=Decimal ))
[docs] class Serializable: """ Classes inheriting from this base are serialised using the :class:`DictionarySerializer` class. The keys that should be used need to be specified as follows:: class Point(Serializable, keys=('x', 'y')): def __init__(self, x, y): self.x = x self.y = y """
[docs] serialized_keys: ClassVar[Collection[str]]
@classmethod
[docs] def serializers(cls) -> Serializers: return default_serializers # for testing
[docs] def __init_subclass__(cls, keys: Collection[str], **kwargs: Any): super().__init_subclass__(**kwargs) cls.serialized_keys = keys cls.serializers().register(DictionarySerializer( target=cls, keys=keys ))
# FIXME: We should probably add type annotations for the keyword # parameters we care about for the functions below @overload
[docs] def dumps(obj: None, **extra: Any) -> None: ...
@overload def dumps(obj: Any, **extra: Any) -> str: ... def dumps(obj: Any | None, **extra: Any) -> str | None: if obj is not None: return json.dumps( obj, default=default_serializers.encode, separators=(',', ':'), **extra) return None
[docs] def loads(txt: str | bytes | bytearray | None, **extra: Any) -> Any: if txt is not None: return json.loads( txt, object_hook=default_serializers.decode, **extra) return {}
[docs] def dump(data: Any, fp: SupportsWrite[str], **extra: Any) -> None: fp.write(dumps(data, **extra))
[docs] def load(fp: SupportsRead[str | bytes], **extra: Any) -> Any: return loads(fp.read(), **extra)