Module `refinery.lib.json`

In order to represent arbitrary data as JSON, these classes help extend the built-in json module in order to support custom encoding of already serializable types.

Expand source code Browse git

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
In order to represent arbitrary data as JSON, these classes help extend the built-in
json module in order to support custom encoding of already serializable types.
"""
from typing import List, Tuple, Union

import datetime
import json
import re
import uuid

from refinery.lib.types import ByteStr


class JSONEncoderExMeta(type):
    """
    This metaclass is the type of `refinery.lib.json.JSONEncoderEx` and exists in
    order to facilitate a context manager at the type level.
    """

    def __enter__(cls):
        def _custom_isinstance(obj, tp):
            if cls.handled(obj):
                return False
            return isinstance(obj, tp)

        def mkiter(*args, **kwargs):
            kwargs.update(isinstance=_custom_isinstance)
            return cls._make_iterencode_old(*args, **kwargs)

        cls._make_iterencode_old = json.encoder._make_iterencode
        json.encoder._make_iterencode = mkiter
        return cls

    def __exit__(cls, etype, eval, tb):
        json.encoder._make_iterencode = cls._make_iterencode_old
        return False

    def dumps(cls, data, indent=4, **kwargs):
        kwargs.setdefault('cls', cls)
        return json.dumps(data, indent=indent, **kwargs)


class JSONEncoderEx(json.JSONEncoder, metaclass=JSONEncoderExMeta):
    """
    Base class for JSON encoders used in refinery. Any such encoder can
    be used as a context which temporarily performs a monkey-patch of the
    built-in json module to allow custom encoding of already serializable
    types such as `list` or `dict`. This is done as follows:

        class MyEncoder(JSONEncoderEx):
            pass

        with MyEncoder as encoder:
            return encoder.dumps(data)
    """
    def encode(self, obj):
        if isinstance(obj, dict) and not all(isinstance(k, str) for k in obj.keys()):
            def _encode(k):
                if isinstance(k, (bytes, bytearray, memoryview)):
                    try: return k.encode('ascii')
                    except Exception: pass
                return str(k)
            obj = {_encode(key): value for key, value in obj.items()}
        data = super().encode(obj)
        if self.substitute:
            uids = R'''(['"])({})\1'''.format('|'.join(re.escape(u) for u in self.substitute))
            return re.sub(uids, lambda m: self.substitute[m[2]], data)
        return data

    def encode_raw(self, representation):
        uid = str(uuid.uuid4())
        self.substitute[uid] = representation
        return uid

    def default(self, obj):
        if isinstance(obj, datetime.datetime):
            return obj.isoformat(' ', 'seconds')
        return super().default(obj)

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.substitute = {}

    @classmethod
    def handled(cls, obj) -> bool:
        """
        Returns whether the given object can be handled by the decoder. When a `refinery.lib.json.JSONEncoderEx` is used as
        a context manager, then it is possible to return `True` for basic types such as `list` to provide custom encodings of
        these types.
        """
        return False


class BytesEncoder(JSONEncoderEx):
    """
    A base class for JSON encoders that can encode byte arrays.
    """

    @classmethod
    def _is_byte_array(cls, obj) -> bool:
        return isinstance(obj, (bytes, bytearray, memoryview))

    @classmethod
    def handled(cls, obj) -> bool:
        return cls._is_byte_array(obj) or super().handled(obj)

    def encode_bytes(self, obj: ByteStr):
        raise NotImplementedError

    def default(self, obj):
        if self._is_byte_array(obj):
            return self.encode_bytes(obj)
        return super().default(obj)


class BytesAsArrayEncoder(BytesEncoder):
    """
    This JSON Encoder encodes byte strings as arrays of integers.
    """
    def encode_bytes(self, obj: ByteStr):
        return self.encode_raw('[{}]'.format(','.join(str(b & 0xFF) for b in obj)))


class BytesAsStringEncoder(BytesEncoder):
    """
    This JSON Encoder encodes byte strings as escaped strings.
    """
    def encode_bytes(self, obj: ByteStr):
        if not isinstance(obj, (bytes, bytearray)):
            if not isinstance(obj, memoryview):
                obj = (b & 0xFF for b in obj)
            obj = bytes(obj)
        return obj.decode('latin1')


def flattened(data: dict, prefix='', separator='.') -> List[Tuple[str, Union[int, float, str]]]:
    def flatten(cursor, prefix):
        if isinstance(cursor, dict):
            for key, value in cursor.items():
                new_prefix = key if not prefix else F'{prefix}{separator}{key}'
                yield from flatten(value, new_prefix)
        elif isinstance(cursor, list):
            width = len(F'{len(cursor) - 1:X}')
            for key, value in enumerate(cursor):
                yield from flatten(value, F'{prefix}[0x{key:0{width}X}]')
        else:
            yield (prefix, cursor)
    yield from flatten(data, prefix)

Functions

def flattened(data, prefix='', separator='.')

Expand source code Browse git

def flattened(data: dict, prefix='', separator='.') -> List[Tuple[str, Union[int, float, str]]]:
    def flatten(cursor, prefix):
        if isinstance(cursor, dict):
            for key, value in cursor.items():
                new_prefix = key if not prefix else F'{prefix}{separator}{key}'
                yield from flatten(value, new_prefix)
        elif isinstance(cursor, list):
            width = len(F'{len(cursor) - 1:X}')
            for key, value in enumerate(cursor):
                yield from flatten(value, F'{prefix}[0x{key:0{width}X}]')
        else:
            yield (prefix, cursor)
    yield from flatten(data, prefix)

Classes

class JSONEncoderExMeta (*args, **kwargs)

This metaclass is the type of JSONEncoderEx and exists in order to facilitate a context manager at the type level.

Expand source code Browse git

class JSONEncoderExMeta(type):
    """
    This metaclass is the type of `refinery.lib.json.JSONEncoderEx` and exists in
    order to facilitate a context manager at the type level.
    """

    def __enter__(cls):
        def _custom_isinstance(obj, tp):
            if cls.handled(obj):
                return False
            return isinstance(obj, tp)

        def mkiter(*args, **kwargs):
            kwargs.update(isinstance=_custom_isinstance)
            return cls._make_iterencode_old(*args, **kwargs)

        cls._make_iterencode_old = json.encoder._make_iterencode
        json.encoder._make_iterencode = mkiter
        return cls

    def __exit__(cls, etype, eval, tb):
        json.encoder._make_iterencode = cls._make_iterencode_old
        return False

    def dumps(cls, data, indent=4, **kwargs):
        kwargs.setdefault('cls', cls)
        return json.dumps(data, indent=indent, **kwargs)

Ancestors

builtins.type

Methods

def dumps(cls, data, indent=4, **kwargs)

Expand source code Browse git

def dumps(cls, data, indent=4, **kwargs):
    kwargs.setdefault('cls', cls)
    return json.dumps(data, indent=indent, **kwargs)

class JSONEncoderEx (*args, **kwargs)

Base class for JSON encoders used in refinery. Any such encoder can be used as a context which temporarily performs a monkey-patch of the built-in json module to allow custom encoding of already serializable types such as list or dict. This is done as follows:

class MyEncoder(JSONEncoderEx):
    pass

with MyEncoder as encoder:
    return encoder.dumps(data)

Constructor for JSONEncoder, with sensible defaults.

If skipkeys is false, then it is a TypeError to attempt encoding of keys that are not str, int, float or None. If skipkeys is True, such items are simply skipped.

If ensure_ascii is true, the output is guaranteed to be str objects with all incoming non-ASCII characters escaped. If ensure_ascii is false, the output can contain non-ASCII characters.

If check_circular is true, then lists, dicts, and custom encoded objects will be checked for circular references during encoding to prevent an infinite recursion (which would cause an OverflowError). Otherwise, no such check takes place.

If allow_nan is true, then NaN, Infinity, and -Infinity will be encoded as such. This behavior is not JSON specification compliant, but is consistent with most JavaScript based encoders and decoders. Otherwise, it will be a ValueError to encode such floats.

If sort_keys is true, then the output of dictionaries will be sorted by key; this is useful for regression tests to ensure that JSON serializations can be compared on a day-to-day basis.

If indent is a non-negative integer, then JSON array elements and object members will be pretty-printed with that indent level. An indent level of 0 will only insert newlines. None is the most compact representation.

If specified, separators should be an (item_separator, key_separator) tuple. The default is (', ', ': ') if indent is None and (',', ': ') otherwise. To get the most compact JSON representation, you should specify (',', ':') to eliminate whitespace.

If specified, default is a function that gets called for objects that can't otherwise be serialized. It should return a JSON encodable version of the object or raise a TypeError.

Expand source code Browse git

class JSONEncoderEx(json.JSONEncoder, metaclass=JSONEncoderExMeta):
    """
    Base class for JSON encoders used in refinery. Any such encoder can
    be used as a context which temporarily performs a monkey-patch of the
    built-in json module to allow custom encoding of already serializable
    types such as `list` or `dict`. This is done as follows:

        class MyEncoder(JSONEncoderEx):
            pass

        with MyEncoder as encoder:
            return encoder.dumps(data)
    """
    def encode(self, obj):
        if isinstance(obj, dict) and not all(isinstance(k, str) for k in obj.keys()):
            def _encode(k):
                if isinstance(k, (bytes, bytearray, memoryview)):
                    try: return k.encode('ascii')
                    except Exception: pass
                return str(k)
            obj = {_encode(key): value for key, value in obj.items()}
        data = super().encode(obj)
        if self.substitute:
            uids = R'''(['"])({})\1'''.format('|'.join(re.escape(u) for u in self.substitute))
            return re.sub(uids, lambda m: self.substitute[m[2]], data)
        return data

    def encode_raw(self, representation):
        uid = str(uuid.uuid4())
        self.substitute[uid] = representation
        return uid

    def default(self, obj):
        if isinstance(obj, datetime.datetime):
            return obj.isoformat(' ', 'seconds')
        return super().default(obj)

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.substitute = {}

    @classmethod
    def handled(cls, obj) -> bool:
        """
        Returns whether the given object can be handled by the decoder. When a `refinery.lib.json.JSONEncoderEx` is used as
        a context manager, then it is possible to return `True` for basic types such as `list` to provide custom encodings of
        these types.
        """
        return False

Ancestors

json.encoder.JSONEncoder

Subclasses

BytesEncoder

Static methods

def handled(obj)

Returns whether the given object can be handled by the decoder. When a JSONEncoderEx is used as a context manager, then it is possible to return True for basic types such as list to provide custom encodings of these types.

Expand source code Browse git

@classmethod
def handled(cls, obj) -> bool:
    """
    Returns whether the given object can be handled by the decoder. When a `refinery.lib.json.JSONEncoderEx` is used as
    a context manager, then it is possible to return `True` for basic types such as `list` to provide custom encodings of
    these types.
    """
    return False

Methods

def encode(self, obj)

Return a JSON string representation of a Python data structure.

>>> from json.encoder import JSONEncoder
>>> JSONEncoder().encode({"foo": ["bar", "baz"]})
'{"foo": ["bar", "baz"]}'

Expand source code Browse git

def encode(self, obj):
    if isinstance(obj, dict) and not all(isinstance(k, str) for k in obj.keys()):
        def _encode(k):
            if isinstance(k, (bytes, bytearray, memoryview)):
                try: return k.encode('ascii')
                except Exception: pass
            return str(k)
        obj = {_encode(key): value for key, value in obj.items()}
    data = super().encode(obj)
    if self.substitute:
        uids = R'''(['"])({})\1'''.format('|'.join(re.escape(u) for u in self.substitute))
        return re.sub(uids, lambda m: self.substitute[m[2]], data)
    return data

def encode_raw(self, representation)

Expand source code Browse git

def encode_raw(self, representation):
    uid = str(uuid.uuid4())
    self.substitute[uid] = representation
    return uid

def default(self, obj)

Implement this method in a subclass such that it returns a serializable object for o, or calls the base implementation (to raise a TypeError).

For example, to support arbitrary iterators, you could implement default like this::

def default(self, o):
    try:
        iterable = iter(o)
    except TypeError:
        pass
    else:
        return list(iterable)
    # Let the base class default method raise the TypeError
    return JSONEncoder.default(self, o)

Expand source code Browse git

def default(self, obj):
    if isinstance(obj, datetime.datetime):
        return obj.isoformat(' ', 'seconds')
    return super().default(obj)

class BytesEncoder (*args, **kwargs)

A base class for JSON encoders that can encode byte arrays.

Constructor for JSONEncoder, with sensible defaults.

If skipkeys is false, then it is a TypeError to attempt encoding of keys that are not str, int, float or None. If skipkeys is True, such items are simply skipped.

If ensure_ascii is true, the output is guaranteed to be str objects with all incoming non-ASCII characters escaped. If ensure_ascii is false, the output can contain non-ASCII characters.

If sort_keys is true, then the output of dictionaries will be sorted by key; this is useful for regression tests to ensure that JSON serializations can be compared on a day-to-day basis.

If specified, default is a function that gets called for objects that can't otherwise be serialized. It should return a JSON encodable version of the object or raise a TypeError.

Expand source code Browse git

class BytesEncoder(JSONEncoderEx):
    """
    A base class for JSON encoders that can encode byte arrays.
    """

    @classmethod
    def _is_byte_array(cls, obj) -> bool:
        return isinstance(obj, (bytes, bytearray, memoryview))

    @classmethod
    def handled(cls, obj) -> bool:
        return cls._is_byte_array(obj) or super().handled(obj)

    def encode_bytes(self, obj: ByteStr):
        raise NotImplementedError

    def default(self, obj):
        if self._is_byte_array(obj):
            return self.encode_bytes(obj)
        return super().default(obj)

Ancestors

JSONEncoderEx
json.encoder.JSONEncoder

Methods

def encode_bytes(self, obj)

Expand source code Browse git

def encode_bytes(self, obj: ByteStr):
    raise NotImplementedError

Inherited members

JSONEncoderEx:
- default
- encode
- handled

class BytesAsArrayEncoder (*args, **kwargs)

This JSON Encoder encodes byte strings as arrays of integers.

Constructor for JSONEncoder, with sensible defaults.

If skipkeys is false, then it is a TypeError to attempt encoding of keys that are not str, int, float or None. If skipkeys is True, such items are simply skipped.

If ensure_ascii is true, the output is guaranteed to be str objects with all incoming non-ASCII characters escaped. If ensure_ascii is false, the output can contain non-ASCII characters.

If sort_keys is true, then the output of dictionaries will be sorted by key; this is useful for regression tests to ensure that JSON serializations can be compared on a day-to-day basis.

If specified, default is a function that gets called for objects that can't otherwise be serialized. It should return a JSON encodable version of the object or raise a TypeError.

Expand source code Browse git

class BytesAsArrayEncoder(BytesEncoder):
    """
    This JSON Encoder encodes byte strings as arrays of integers.
    """
    def encode_bytes(self, obj: ByteStr):
        return self.encode_raw('[{}]'.format(','.join(str(b & 0xFF) for b in obj)))

Ancestors

BytesEncoder
JSONEncoderEx
json.encoder.JSONEncoder

Methods

def encode_bytes(self, obj)

Expand source code Browse git

def encode_bytes(self, obj: ByteStr):
    return self.encode_raw('[{}]'.format(','.join(str(b & 0xFF) for b in obj)))

Inherited members

BytesEncoder:
- default
- encode
- handled

class BytesAsStringEncoder (*args, **kwargs)

This JSON Encoder encodes byte strings as escaped strings.

Constructor for JSONEncoder, with sensible defaults.

If skipkeys is false, then it is a TypeError to attempt encoding of keys that are not str, int, float or None. If skipkeys is True, such items are simply skipped.

If ensure_ascii is true, the output is guaranteed to be str objects with all incoming non-ASCII characters escaped. If ensure_ascii is false, the output can contain non-ASCII characters.

If sort_keys is true, then the output of dictionaries will be sorted by key; this is useful for regression tests to ensure that JSON serializations can be compared on a day-to-day basis.

If specified, default is a function that gets called for objects that can't otherwise be serialized. It should return a JSON encodable version of the object or raise a TypeError.

Expand source code Browse git

class BytesAsStringEncoder(BytesEncoder):
    """
    This JSON Encoder encodes byte strings as escaped strings.
    """
    def encode_bytes(self, obj: ByteStr):
        if not isinstance(obj, (bytes, bytearray)):
            if not isinstance(obj, memoryview):
                obj = (b & 0xFF for b in obj)
            obj = bytes(obj)
        return obj.decode('latin1')

Ancestors

BytesEncoder
JSONEncoderEx
json.encoder.JSONEncoder

Subclasses

Methods

def encode_bytes(self, obj)

Expand source code Browse git

def encode_bytes(self, obj: ByteStr):
    if not isinstance(obj, (bytes, bytearray)):
        if not isinstance(obj, memoryview):
            obj = (b & 0xFF for b in obj)
        obj = bytes(obj)
    return obj.decode('latin1')

Inherited members

BytesEncoder:
- default
- encode
- handled