Module refinery.lib.json
This module provides JSON encoding and decoding. All refinery units should use this interface rather than the standard library JSON module. It first attempts to use the orJSON external library as backend, which is much faster, and then falls back to the standard library if orJSON is not available.
Expand source code Browse git
"""
This module provides JSON encoding and decoding. All refinery units should use this interface
rather than the standard library JSON module. It first attempts to use the orJSON external library
as backend, which is much faster, and then falls back to the standard library if orJSON is not
available.
"""
from __future__ import annotations
import codecs
import json as pyjson
from datetime import date, datetime, time
from enum import Enum, IntFlag
from uuid import UUID
from refinery.lib.shared import orjson
from refinery.lib.tools import isbuffer
from refinery.lib.types import Any, Callable, Generator
def flattened(data: dict, prefix: str = '', separator: str = '.'):
"""
Yield the rows of a flattened view for the input JSON dictionary. This is used by several
refinery units to display a tabular view of what would otherwise be output as JSON.
"""
def flatten(
cursor: dict | list | str | int | float | bool, prefix: str
) -> Generator[tuple[str, int | float | bool | str]]:
if isinstance(cursor, dict):
for key, value in cursor.items():
new_prefix = key if not prefix else F'{prefix}{separator}{key}'
yield from flatten(value, new_prefix)
elif isinstance(cursor, list):
width = len(F'{len(cursor) - 1:X}')
for key, value in enumerate(cursor):
yield from flatten(value, F'{prefix}[0x{key:0{width}X}]')
else:
yield (prefix, cursor)
yield from flatten(data, prefix)
def _common_conversions(o):
if isinstance(o, Enum):
return o.name
if isinstance(o, datetime):
return o.isoformat(' ', 'seconds')
if isinstance(o, time):
return o.isoformat('seconds')
if isinstance(o, date):
return o.isoformat()
def convert_key(k: Enum | datetime | date | time | int | float | bool | str) -> str:
"""
Conversions of several non-string types for dictionary keys to enable JSON serialization.
"""
return str(k) if (t := _common_conversions(k)) is None else t
def standard_conversions(o):
"""
Converts `datetime` and `UUID` objects to their canonical string representations, and also
converts `set`. `tuple`. and `frozenset` objects to `list`s for JSON serialization. Other
serialization of standard object types should be added here.
"""
if (t := _common_conversions(o)) is not None:
return t
if isinstance(o, IntFlag):
return [flag.name for flag in o.__class__ if o & flag == flag]
if isinstance(o, UUID):
return str(o)
if isinstance(o, (set, tuple, frozenset)):
return list(o)
raise TypeError
def preprocess(o, keys: bool = False):
"""
This method ensures that no integers requiring more than 64 bits are stored within nested
dictionaries and lists of the input object. Integers that exceed this limit are converted
to hexadecimal string representations with prefix.
When the `keys` option is set, the method also uses `refinery.lib.json.convert_key` to turn
all non-string keys in dictionaries into strings.
"""
if isinstance(o, dict):
if not keys:
for k, v in o.items():
o[k] = preprocess(v, keys=False)
else:
invalid_keys = []
for k, v in o.items():
if not isinstance(k, str):
invalid_keys.append(k)
else:
o[k] = preprocess(v, keys=True)
for k in invalid_keys:
o[convert_key(k)] = preprocess(o.pop(k))
elif isinstance(o, list):
for k, v in enumerate(o):
o[k] = preprocess(v, keys=keys)
elif isinstance(o, int) and o.bit_length() > 64:
return hex(o)
return o
def py_json_dumps(
object,
pretty: bool = True,
checks: bool = True,
tojson: Callable[[Any], Any] | None = None,
) -> bytes:
"""
This is the JSON dump method wrapper which is based on the standard library backend. It is
exposed separately to allow testing.
"""
if (enc := tojson) is not None:
class encoder(pyjson.JSONEncoder):
default = staticmethod(tojson) # type:ignore
enc = encoder
if checks:
object = preprocess(object, keys=True)
if pretty:
out = pyjson.dumps(object, ensure_ascii=False, cls=enc, indent=2)
else:
out = pyjson.dumps(object, ensure_ascii=False, cls=enc, indent=None, separators=(',', ':'))
return out.encode('utf8')
try:
_or_json_loads = orjson.loads
_or_json_dumps = orjson.dumps
except ImportError:
dumps = py_json_dumps
loads = pyjson.loads
else:
def __loads(data):
# orjson does not like subclasses of bytearray, and we do that a lot
return _or_json_loads(memoryview(data))
def __dumps(
object,
pretty: bool = True,
checks: bool = True,
tojson: Callable[[Any], Any] | None = None,
):
default = tojson or standard_conversions
options = (
0
| orjson.OPT_PASSTHROUGH_DATETIME
| orjson.OPT_NON_STR_KEYS
| orjson.OPT_OMIT_MICROSECONDS
| orjson.OPT_SERIALIZE_DATACLASS
| orjson.OPT_SERIALIZE_UUID
)
if pretty:
options |= orjson.OPT_INDENT_2
if checks:
object = preprocess(object)
return _or_json_dumps(
object,
option=options,
default=default,
)
loads = __loads
dumps = __dumps
def bytes_as_array(o):
"""
A default handler that will convert byte strings to lists of integers.
"""
if isbuffer(o):
return [int(b & 0xFF) for b in o]
return standard_conversions(o)
def bytes_as_string(o):
"""
A default handler that will convert byte strings to 8-bit ASCII encoded strings.
"""
if isbuffer(o):
return codecs.decode(o, 'latin1')
return standard_conversions(o)
__pdoc__ = {
'dumps': (
'A unified proxy method for dumping input data to JSON, using either the orJSON or the '
'standard library as backend, depending on what is available. The interface more closely '
'resembles orJSON: The `pretty` option controls whether the output is indented or '
'minified, and an optional conversion handler can be passed as the `default` parameter '
'to serialize Python objects that are not handled natively by the backend. Finally, the '
'option `checks` can be set to false to prevent all preprocessing of the input data. Use '
'it when you are absolutely certain that the input is JSON-serializable and requires no '
'normalization of any kind.'
),
'loads': (
'A unified proxy method for loading JSON data as a Python object, using either orJSON '
'or the standard library backend.'
),
}
Functions
def flattened(data, prefix='', separator='.')-
Yield the rows of a flattened view for the input JSON dictionary. This is used by several refinery units to display a tabular view of what would otherwise be output as JSON.
Expand source code Browse git
def flattened(data: dict, prefix: str = '', separator: str = '.'): """ Yield the rows of a flattened view for the input JSON dictionary. This is used by several refinery units to display a tabular view of what would otherwise be output as JSON. """ def flatten( cursor: dict | list | str | int | float | bool, prefix: str ) -> Generator[tuple[str, int | float | bool | str]]: if isinstance(cursor, dict): for key, value in cursor.items(): new_prefix = key if not prefix else F'{prefix}{separator}{key}' yield from flatten(value, new_prefix) elif isinstance(cursor, list): width = len(F'{len(cursor) - 1:X}') for key, value in enumerate(cursor): yield from flatten(value, F'{prefix}[0x{key:0{width}X}]') else: yield (prefix, cursor) yield from flatten(data, prefix) def convert_key(k)-
Conversions of several non-string types for dictionary keys to enable JSON serialization.
Expand source code Browse git
def convert_key(k: Enum | datetime | date | time | int | float | bool | str) -> str: """ Conversions of several non-string types for dictionary keys to enable JSON serialization. """ return str(k) if (t := _common_conversions(k)) is None else t def standard_conversions(o)-
Converts
datetimeandUUIDobjects to their canonical string representations, and also convertsset.tuple. andfrozensetobjects tolists for JSON serialization. Other serialization of standard object types should be added here.Expand source code Browse git
def standard_conversions(o): """ Converts `datetime` and `UUID` objects to their canonical string representations, and also converts `set`. `tuple`. and `frozenset` objects to `list`s for JSON serialization. Other serialization of standard object types should be added here. """ if (t := _common_conversions(o)) is not None: return t if isinstance(o, IntFlag): return [flag.name for flag in o.__class__ if o & flag == flag] if isinstance(o, UUID): return str(o) if isinstance(o, (set, tuple, frozenset)): return list(o) raise TypeError def preprocess(o, keys=False)-
This method ensures that no integers requiring more than 64 bits are stored within nested dictionaries and lists of the input object. Integers that exceed this limit are converted to hexadecimal string representations with prefix.
When the
keysoption is set, the method also usesconvert_key()to turn all non-string keys in dictionaries into strings.Expand source code Browse git
def preprocess(o, keys: bool = False): """ This method ensures that no integers requiring more than 64 bits are stored within nested dictionaries and lists of the input object. Integers that exceed this limit are converted to hexadecimal string representations with prefix. When the `keys` option is set, the method also uses `refinery.lib.json.convert_key` to turn all non-string keys in dictionaries into strings. """ if isinstance(o, dict): if not keys: for k, v in o.items(): o[k] = preprocess(v, keys=False) else: invalid_keys = [] for k, v in o.items(): if not isinstance(k, str): invalid_keys.append(k) else: o[k] = preprocess(v, keys=True) for k in invalid_keys: o[convert_key(k)] = preprocess(o.pop(k)) elif isinstance(o, list): for k, v in enumerate(o): o[k] = preprocess(v, keys=keys) elif isinstance(o, int) and o.bit_length() > 64: return hex(o) return o def py_json_dumps(object, pretty=True, checks=True, tojson=None)-
This is the JSON dump method wrapper which is based on the standard library backend. It is exposed separately to allow testing.
Expand source code Browse git
def py_json_dumps( object, pretty: bool = True, checks: bool = True, tojson: Callable[[Any], Any] | None = None, ) -> bytes: """ This is the JSON dump method wrapper which is based on the standard library backend. It is exposed separately to allow testing. """ if (enc := tojson) is not None: class encoder(pyjson.JSONEncoder): default = staticmethod(tojson) # type:ignore enc = encoder if checks: object = preprocess(object, keys=True) if pretty: out = pyjson.dumps(object, ensure_ascii=False, cls=enc, indent=2) else: out = pyjson.dumps(object, ensure_ascii=False, cls=enc, indent=None, separators=(',', ':')) return out.encode('utf8') def loads(data)-
A unified proxy method for loading JSON data as a Python object, using either orJSON or the standard library backend.
Expand source code Browse git
def __loads(data): # orjson does not like subclasses of bytearray, and we do that a lot return _or_json_loads(memoryview(data)) def dumps(object, pretty=True, checks=True, tojson=None)-
A unified proxy method for dumping input data to JSON, using either the orJSON or the standard library as backend, depending on what is available. The interface more closely resembles orJSON: The
prettyoption controls whether the output is indented or minified, and an optional conversion handler can be passed as thedefaultparameter to serialize Python objects that are not handled natively by the backend. Finally, the optioncheckscan be set to false to prevent all preprocessing of the input data. Use it when you are absolutely certain that the input is JSON-serializable and requires no normalization of any kind.Expand source code Browse git
def __dumps( object, pretty: bool = True, checks: bool = True, tojson: Callable[[Any], Any] | None = None, ): default = tojson or standard_conversions options = ( 0 | orjson.OPT_PASSTHROUGH_DATETIME | orjson.OPT_NON_STR_KEYS | orjson.OPT_OMIT_MICROSECONDS | orjson.OPT_SERIALIZE_DATACLASS | orjson.OPT_SERIALIZE_UUID ) if pretty: options |= orjson.OPT_INDENT_2 if checks: object = preprocess(object) return _or_json_dumps( object, option=options, default=default, ) def bytes_as_array(o)-
A default handler that will convert byte strings to lists of integers.
Expand source code Browse git
def bytes_as_array(o): """ A default handler that will convert byte strings to lists of integers. """ if isbuffer(o): return [int(b & 0xFF) for b in o] return standard_conversions(o) def bytes_as_string(o)-
A default handler that will convert byte strings to 8-bit ASCII encoded strings.
Expand source code Browse git
def bytes_as_string(o): """ A default handler that will convert byte strings to 8-bit ASCII encoded strings. """ if isbuffer(o): return codecs.decode(o, 'latin1') return standard_conversions(o)