""" Sigh. Here we go again, *another* json implementation with support for:
- date
- datetime
- time
Because nobody else does all of these. And if they do (like standardjson), they
don't support decoding...
"""
from __future__ import annotations
import datetime
import isodate
import json
import re
import types
from decimal import Decimal
from itertools import chain
from onegov.core.cache import instance_lru_cache
from typing import overload, Any, ClassVar, Generic, TypeVar, TYPE_CHECKING
if TYPE_CHECKING:
from _typeshed import SupportsRead, SupportsWrite
from collections.abc import Callable, Collection, Iterator, Iterable
from typing import TypeAlias
from onegov.core.types import JSON_ro, JSONObject_ro
[docs]
AnySerializer: TypeAlias = (
'PrefixSerializer[_T] | DictionarySerializer[_T]')
[docs]
_ST = TypeVar('_ST', bound='JSON_ro')
[docs]
class Serializer(Generic[_T, _ST]):
""" Provides a way to encode all objects of a given class or its
subclasses to and from json.
"""
def __init__(self, target: type[_T]):
assert isinstance(target, type), 'expects a class'
[docs]
def encode(self, obj: _T) -> _ST:
raise NotImplementedError
[docs]
def decode(self, value: _ST) -> _T:
raise NotImplementedError
[docs]
class PrefixSerializer(Serializer[_T, str]):
""" Serializes objects to a string with a prefix.
Resulting json values take the form of __prefix__@<value>, where <value>
is the encoded value and __prefix__@ is the prefix that is used to
differentiate between normal strings and encoded strings.
Note that the part after the prefix is user-supplied and possibly unsafe.
So something like an 'eval' should be out of the question!
"""
[docs]
prefix_expression = re.compile(r'__(?P<prefix>[a-zA-Z]+)__@')
[docs]
prefix_characters = re.compile(r'[a-zA-Z-]+')
def __init__(
self,
target: type[_T],
prefix: str,
encode: Callable[[_T], str],
decode: Callable[[str], _T],
):
super().__init__(target)
assert self.prefix_characters.match(prefix)
[docs]
self.prefix_length = len(self.prefix_format.format(prefix, ''))
[docs]
def encode(self, obj: _T) -> str:
return '__{}__@{}'.format(self.prefix, self._encode(obj))
[docs]
def decode(self, string: str) -> _T:
return self._decode(string[self.prefix_length:])
[docs]
class DictionarySerializer(Serializer[_T, 'JSONObject_ro']):
""" Serialises objects that can be built with keyword arguments.
For example::
class Point:
def __init__(self, x, y):
self.x = x
self.y = y
Can be serialised using::
DictionarySerializer(Point, ('x', 'y'))
Which results in something like this in JSON::
{'x': 1, 'y': 2}
As the internal __dict__ represenation is of no concern, __slots__ may
be used:
class Point:
__slots__ = ('x', 'y')
def __init__(self, x, y):
self.x = x
self.y = y
"""
def __init__(self, target: type[_T], keys: Iterable[str]):
super().__init__(target)
[docs]
self.keys = frozenset(keys)
[docs]
def encode(self, obj: _T) -> JSONObject_ro:
return {k: getattr(obj, k) for k in self.keys}
[docs]
def decode(self, dictionary: JSONObject_ro) -> _T:
return self.target(**dictionary)
[docs]
class Serializers:
""" Organises the different serializer implementations under a unifiying
interface. This allows the actual encoder/decoder to call a single class
without having to worry how the various serializers need to be looked up
and called.
"""
[docs]
by_prefix: dict[str, PrefixSerializer[Any]]
[docs]
by_keys: dict[frozenset[str], DictionarySerializer[Any]]
[docs]
known_key_lengths: set[int]
def __init__(self) -> None:
self.by_prefix = {}
self.by_keys = {}
self.known_key_lengths = set()
@property
[docs]
def registered(self) -> Iterator[AnySerializer[Any]]:
return chain(self.by_prefix.values(), self.by_keys.values())
[docs]
def register(
self,
serializer: PrefixSerializer[Any] | DictionarySerializer[Any]
) -> None:
if isinstance(serializer, PrefixSerializer):
self.by_prefix[serializer.prefix] = serializer
elif isinstance(serializer, DictionarySerializer):
self.by_keys[serializer.keys] = serializer
self.known_key_lengths.add(len(serializer.keys))
else:
raise NotImplementedError
[docs]
def serializer_for(
self,
value: object
) -> AnySerializer[Any] | None:
if isinstance(value, str):
return self.serializer_for_string(value)
if isinstance(value, dict):
return self.serializer_for_dict(value)
if isinstance(value, type):
return self.serializer_for_class(value)
return None
[docs]
def serializer_for_string(
self,
string: str
) -> PrefixSerializer[Any] | None:
match = PrefixSerializer.prefix_expression.match(string)
if match is None:
return None
return self.by_prefix.get(match.group('prefix'))
[docs]
def serializer_for_dict(
self,
dictionary: dict[str, Any]
) -> DictionarySerializer[Any] | None:
# we can exit early for all dictionaries which cannot possibly match
# the keys we're looking for by comparing the number of keys in the
# dictionary - this is much cheaper than the next lookup
if len(dictionary.keys()) not in self.known_key_lengths:
return None
return self.by_keys.get(frozenset(dictionary.keys()))
@instance_lru_cache(maxsize=16)
[docs]
def serializer_for_class(
self,
cls: type[_T]
) -> AnySerializer[_T] | None:
matches = (s for s in self.registered if issubclass(cls, s.target))
return next(matches, None)
[docs]
def encode(self, value: object) -> JSON_ro:
serializer = self.serializer_for(value.__class__)
if serializer:
return serializer.encode(value)
if isinstance(value, types.GeneratorType):
# FIXME: this is a little sus, generators will only work for
# types that are already JSON serializable, since we
# don't try to get a serializer for each generated value
return tuple(v for v in value)
raise TypeError('{} is not JSON serializable'.format(repr(value)))
[docs]
def decode(self, value: Any) -> Any:
serializer = self.serializer_for(value)
if serializer:
value = serializer.decode(value)
elif isinstance(value, dict):
for k, v in value.items():
value[k] = self.decode(v)
return value
# The builtin serializers
# FIXME: is the aim of these serializers to be fast, or should the
# output be human readable? If it's the former, then we should
# probably change some of these serializers, the iso format
# is not very efficient in deserialization, a pair with a
# timestamp and the timezone would be much faster
[docs]
default_serializers = Serializers()
default_serializers.register(PrefixSerializer(
prefix='datetime',
target=datetime.datetime,
encode=isodate.datetime_isoformat,
decode=isodate.parse_datetime
))
default_serializers.register(PrefixSerializer(
prefix='time',
target=datetime.time,
encode=isodate.time_isoformat,
decode=isodate.parse_time
))
default_serializers.register(PrefixSerializer(
prefix='date',
target=datetime.date,
encode=isodate.date_isoformat,
decode=isodate.parse_date
))
default_serializers.register(PrefixSerializer(
prefix='decimal',
target=Decimal,
encode=str,
decode=Decimal
))
[docs]
class Serializable:
""" Classes inheriting from this base are serialised using the
:class:`DictionarySerializer` class.
The keys that should be used need to be specified as follows::
class Point(Serializable, keys=('x', 'y')):
def __init__(self, x, y):
self.x = x
self.y = y
"""
[docs]
serialized_keys: ClassVar[Collection[str]]
@classmethod
[docs]
def serializers(cls) -> Serializers:
return default_serializers # for testing
[docs]
def __init_subclass__(cls, keys: Collection[str], **kwargs: Any):
super().__init_subclass__(**kwargs)
cls.serialized_keys = keys
cls.serializers().register(DictionarySerializer(
target=cls,
keys=keys
))
# FIXME: We should probably add type annotations for the keyword
# parameters we care about for the functions below
@overload
[docs]
def dumps(obj: None, **extra: Any) -> None: ...
@overload
def dumps(obj: Any, **extra: Any) -> str: ...
def dumps(obj: Any | None, **extra: Any) -> str | None:
if obj is not None:
return json.dumps(
obj,
default=default_serializers.encode,
separators=(',', ':'),
**extra)
return None
[docs]
def loads(txt: str | bytes | bytearray | None, **extra: Any) -> Any:
if txt is not None:
return json.loads(
txt,
object_hook=default_serializers.decode,
**extra)
return {}
[docs]
def dump(data: Any, fp: SupportsWrite[str], **extra: Any) -> None:
fp.write(dumps(data, **extra))
[docs]
def load(fp: SupportsRead[str | bytes], **extra: Any) -> Any:
return loads(fp.read(), **extra)