Source code for core.custom.custom_json

""" Sigh. Here we go again, *another* json implementation with support for:

- date
- datetime
- time

Because nobody else does all of these. And if they do (like standardjson), they
don't support decoding...

"""

import datetime
import isodate
import json
import re
import types

from decimal import Decimal
from itertools import chain
from onegov.core.cache import instance_lru_cache


from typing import overload, Any, ClassVar, Generic, TypeVar, TYPE_CHECKING
if TYPE_CHECKING:
    from _typeshed import SupportsRead, SupportsWrite
    from collections.abc import Callable, Collection, Iterator, Iterable
    from typing import TypeAlias

    from onegov.core.types import JSON_ro, JSONObject_ro

[docs] AnySerializer: TypeAlias = ( 'PrefixSerializer[_T] | DictionarySerializer[_T]')
[docs] _T = TypeVar('_T')
[docs] _ST = TypeVar('_ST', bound='JSON_ro')
[docs] class Serializer(Generic[_T, _ST]): """ Provides a way to encode all objects of a given class or its subclasses to and from json. """ def __init__(self, target: type[_T]): assert isinstance(target, type), 'expects a class'
[docs] self.target = target
[docs] def encode(self, obj: _T) -> _ST: raise NotImplementedError
[docs] def decode(self, value: _ST) -> _T: raise NotImplementedError
[docs] class PrefixSerializer(Serializer[_T, str]): """ Serializes objects to a string with a prefix. Resulting json values take the form of __prefix__@<value>, where <value> is the encoded value and __prefix__@ is the prefix that is used to differentiate between normal strings and encoded strings. Note that the part after the prefix is user-supplied and possibly unsafe. So something like an 'eval' should be out of the question! """
[docs] prefix_format = '__{}__@{}'
[docs] prefix_expression = re.compile(r'__(?P<prefix>[a-zA-Z]+)__@')
[docs] prefix_characters = re.compile(r'[a-zA-Z-]+')
def __init__( self, target: type[_T], prefix: str, encode: 'Callable[[_T], str]', decode: 'Callable[[str], _T]', ): super().__init__(target) assert self.prefix_characters.match(prefix)
[docs] self.prefix = prefix
[docs] self.prefix_length = len(self.prefix_format.format(prefix, ''))
[docs] self._encode = encode
[docs] self._decode = decode
[docs] def encode(self, obj: _T) -> str: return '__{}__@{}'.format(self.prefix, self._encode(obj))
[docs] def decode(self, string: str) -> _T: return self._decode(string[self.prefix_length:])
[docs] class DictionarySerializer(Serializer[_T, 'JSONObject_ro']): """ Serialises objects that can be built with keyword arguments. For example:: class Point: def __init__(self, x, y): self.x = x self.y = y Can be serialised using:: DictionarySerializer(Point, ('x', 'y')) Which results in something like this in JSON:: {'x': 1, 'y': 2} As the internal __dict__ represenation is of no concern, __slots__ may be used: class Point: __slots__ = ('x', 'y') def __init__(self, x, y): self.x = x self.y = y """ def __init__(self, target: type[_T], keys: 'Iterable[str]'): super().__init__(target)
[docs] self.keys = frozenset(keys)
[docs] def encode(self, obj: _T) -> 'JSONObject_ro': return {k: getattr(obj, k) for k in self.keys}
[docs] def decode(self, dictionary: 'JSONObject_ro') -> _T: return self.target(**dictionary)
[docs] class Serializers: """ Organises the different serializer implementations under a unifiying interface. This allows the actual encoder/decoder to call a single class without having to worry how the various serializers need to be looked up and called. """
[docs] by_prefix: dict[str, PrefixSerializer[Any]]
[docs] by_keys: dict[frozenset[str], DictionarySerializer[Any]]
[docs] known_key_lengths: set[int]
def __init__(self) -> None: self.by_prefix = {} self.by_keys = {} self.known_key_lengths = set() @property
[docs] def registered(self) -> 'Iterator[AnySerializer[Any]]': return chain(self.by_prefix.values(), self.by_keys.values())
[docs] def register( self, serializer: PrefixSerializer[Any] | DictionarySerializer[Any] ) -> None: if isinstance(serializer, PrefixSerializer): self.by_prefix[serializer.prefix] = serializer elif isinstance(serializer, DictionarySerializer): self.by_keys[serializer.keys] = serializer self.known_key_lengths.add(len(serializer.keys)) else: raise NotImplementedError
[docs] def serializer_for( self, value: object ) -> 'AnySerializer[Any] | None': if isinstance(value, str): return self.serializer_for_string(value) if isinstance(value, dict): return self.serializer_for_dict(value) if isinstance(value, type): return self.serializer_for_class(value) return None
[docs] def serializer_for_string( self, string: str ) -> PrefixSerializer[Any] | None: match = PrefixSerializer.prefix_expression.match(string) if match is None: return None return self.by_prefix.get(match.group('prefix'))
[docs] def serializer_for_dict( self, dictionary: dict[str, Any] ) -> DictionarySerializer[Any] | None: # we can exit early for all dictionaries which cannot possibly match # the keys we're looking for by comparing the number of keys in the # dictionary - this is much cheaper than the next lookup if len(dictionary.keys()) not in self.known_key_lengths: return None return self.by_keys.get(frozenset(dictionary.keys()))
@instance_lru_cache(maxsize=16)
[docs] def serializer_for_class( self, cls: type[_T] ) -> 'AnySerializer[_T] | None': matches = (s for s in self.registered if issubclass(cls, s.target)) return next(matches, None)
[docs] def encode(self, value: object) -> 'JSON_ro': serializer = self.serializer_for(value.__class__) if serializer: return serializer.encode(value) if isinstance(value, types.GeneratorType): # FIXME: this is a little sus, generators will only work for # types that are already JSON serializable, since we # don't try to get a serializer for each generated value return tuple(v for v in value) raise TypeError('{} is not JSON serializable'.format(repr(value)))
[docs] def decode(self, value: Any) -> Any: serializer = self.serializer_for(value) if serializer: value = serializer.decode(value) elif isinstance(value, dict): for k, v in value.items(): value[k] = self.decode(v) return value
# The builtin serializers # FIXME: is the aim of these serializers to be fast, or should the # output be human readable? If it's the former, then we should # probably change some of these serializers, the iso format # is not very efficient in deserialization, a pair with a # timestamp and the timezone would be much faster
[docs] default_serializers = Serializers()
default_serializers.register(PrefixSerializer( prefix='datetime', target=datetime.datetime, encode=isodate.datetime_isoformat, decode=isodate.parse_datetime )) default_serializers.register(PrefixSerializer( prefix='time', target=datetime.time, encode=isodate.time_isoformat, decode=isodate.parse_time )) default_serializers.register(PrefixSerializer( prefix='date', target=datetime.date, encode=isodate.date_isoformat, decode=isodate.parse_date )) default_serializers.register(PrefixSerializer( prefix='decimal', target=Decimal, encode=str, decode=Decimal ))
[docs] class Serializable: """ Classes inheriting from this base are serialised using the :class:`DictionarySerializer` class. The keys that should be used need to be specified as follows:: class Point(Serializable, keys=('x', 'y')): def __init__(self, x, y): self.x = x self.y = y """
[docs] serialized_keys: ClassVar['Collection[str]']
@classmethod
[docs] def serializers(cls) -> Serializers: return default_serializers # for testing
[docs] def __init_subclass__(cls, keys: 'Collection[str]', **kwargs: Any): super().__init_subclass__(**kwargs) cls.serialized_keys = keys cls.serializers().register(DictionarySerializer( target=cls, keys=keys ))
# FIXME: We should probably add type annotations for the keyword # parameters we care about for the functions below @overload
[docs] def dumps(obj: None, **extra: Any) -> None: ...
@overload def dumps(obj: Any, **extra: Any) -> str: ... def dumps(obj: Any | None, **extra: Any) -> str | None: if obj is not None: return json.dumps( obj, default=default_serializers.encode, separators=(',', ':'), **extra) return None
[docs] def loads(txt: str | bytes | bytearray | None, **extra: Any) -> Any: if txt is not None: return json.loads( txt, object_hook=default_serializers.decode, **extra) return {}
[docs] def dump(data: Any, fp: 'SupportsWrite[str]', **extra: Any) -> None: fp.write(dumps(data, **extra))
[docs] def load(fp: 'SupportsRead[str | bytes]', **extra: Any) -> Any: return loads(fp.read(), **extra)