efro.dataclassio

Functionality for importing, exporting, and validating dataclasses.

This allows complex nested dataclasses to be flattened to json-compatible data and restored from said data. It also gracefully handles and preserves unrecognized attribute data, allowing older clients to interact with newer data formats in a nondestructive manner.

 1# Released under the MIT License. See LICENSE for details.
 2#
 3"""Functionality for importing, exporting, and validating dataclasses.
 4
 5This allows complex nested dataclasses to be flattened to json-compatible
 6data and restored from said data. It also gracefully handles and preserves
 7unrecognized attribute data, allowing older clients to interact with newer
 8data formats in a nondestructive manner.
 9"""
10
11from __future__ import annotations
12
13from efro.util import set_canonical_module_names
14from efro.dataclassio._base import (
15    Codec,
16    IOAttrs,
17    IOExtendedData,
18    IOMultiType,
19    EXTRA_ATTRS_ATTR,
20)
21from efro.dataclassio._prep import (
22    ioprep,
23    ioprepped,
24    will_ioprep,
25    is_ioprepped_dataclass,
26)
27from efro.dataclassio._pathcapture import DataclassFieldLookup
28from efro.dataclassio._api import (
29    JsonStyle,
30    dataclass_to_dict,
31    dataclass_to_json,
32    dataclass_from_dict,
33    dataclass_from_json,
34    dataclass_validate,
35    dataclass_hash,
36)
37
38__all__ = [
39    'Codec',
40    'DataclassFieldLookup',
41    'EXTRA_ATTRS_ATTR',
42    'IOAttrs',
43    'IOExtendedData',
44    'IOMultiType',
45    'JsonStyle',
46    'dataclass_from_dict',
47    'dataclass_from_json',
48    'dataclass_to_dict',
49    'dataclass_to_json',
50    'dataclass_validate',
51    'dataclass_hash',
52    'ioprep',
53    'ioprepped',
54    'is_ioprepped_dataclass',
55    'will_ioprep',
56]
57
58# Have these things present themselves cleanly as 'thismodule.SomeClass'
59# instead of 'thismodule._internalmodule.SomeClass'
60set_canonical_module_names(globals())
class Codec(enum.Enum):
34class Codec(Enum):
35    """Specifies expected data format exported to or imported from."""
36
37    #: Use only types that will translate cleanly to/from json: lists,
38    #: dicts with str keys, bools, ints, floats, and None.
39    JSON = 'json'
40
41    #: Mostly like JSON but passes bytes and datetime objects through
42    #: as-is instead of converting them to json-friendly types.
43    FIRESTORE = 'firestore'

Specifies expected data format exported to or imported from.

JSON = <Codec.JSON: 'json'>
FIRESTORE = <Codec.FIRESTORE: 'firestore'>
class DataclassFieldLookup(typing.Generic[T]):
 61class DataclassFieldLookup[T]:
 62    """Get info about nested dataclass fields in type-safe way."""
 63
 64    def __init__(self, cls: type[T]) -> None:
 65        self.cls = cls
 66
 67    def path(self, callback: Callable[[T], Any]) -> str:
 68        """Look up a path on child dataclass fields.
 69
 70        example:
 71          DataclassFieldLookup(MyType).path(lambda obj: obj.foo.bar)
 72
 73        The above example will return the string 'foo.bar' or something
 74        like 'f.b' if the dataclasses have custom storage names set.
 75        It will also be static-type-checked, triggering an error if
 76        MyType.foo.bar is not a valid path. Note, however, that the
 77        callback technically allows any return value but only nested
 78        dataclasses and their fields will succeed.
 79        """
 80
 81        # We tell the type system that we are returning an instance
 82        # of our class, which allows it to perform type checking on
 83        # member lookups. In reality, however, we are providing a
 84        # special object which captures path lookups, so we can build
 85        # a string from them.
 86        if not TYPE_CHECKING:
 87            out = callback(_PathCapture(self.cls))
 88            if not isinstance(out, _PathCapture):
 89                raise TypeError(
 90                    f'Expected a valid path under'
 91                    f' the provided object; got a {type(out)}.'
 92                )
 93            return out.path
 94        return ''
 95
 96    def paths(self, callback: Callable[[T], list[Any]]) -> list[str]:
 97        """Look up multiple paths on child dataclass fields.
 98
 99        Functionality is identical to path() but for multiple paths at once.
100
101        example:
102          DataclassFieldLookup(MyType).paths(lambda obj: [obj.foo, obj.bar])
103        """
104        outvals: list[str] = []
105        if not TYPE_CHECKING:
106            outs = callback(_PathCapture(self.cls))
107            assert isinstance(outs, list)
108            for out in outs:
109                if not isinstance(out, _PathCapture):
110                    raise TypeError(
111                        f'Expected a valid path under'
112                        f' the provided object; got a {type(out)}.'
113                    )
114                outvals.append(out.path)
115        return outvals

Get info about nested dataclass fields in type-safe way.

DataclassFieldLookup(cls: 'type[T]')
64    def __init__(self, cls: type[T]) -> None:
65        self.cls = cls
cls
def path(self, callback: 'Callable[[T], Any]') -> str:
67    def path(self, callback: Callable[[T], Any]) -> str:
68        """Look up a path on child dataclass fields.
69
70        example:
71          DataclassFieldLookup(MyType).path(lambda obj: obj.foo.bar)
72
73        The above example will return the string 'foo.bar' or something
74        like 'f.b' if the dataclasses have custom storage names set.
75        It will also be static-type-checked, triggering an error if
76        MyType.foo.bar is not a valid path. Note, however, that the
77        callback technically allows any return value but only nested
78        dataclasses and their fields will succeed.
79        """
80
81        # We tell the type system that we are returning an instance
82        # of our class, which allows it to perform type checking on
83        # member lookups. In reality, however, we are providing a
84        # special object which captures path lookups, so we can build
85        # a string from them.
86        if not TYPE_CHECKING:
87            out = callback(_PathCapture(self.cls))
88            if not isinstance(out, _PathCapture):
89                raise TypeError(
90                    f'Expected a valid path under'
91                    f' the provided object; got a {type(out)}.'
92                )
93            return out.path
94        return ''

Look up a path on child dataclass fields.

example: DataclassFieldLookup(MyType).path(lambda obj: obj.foo.bar)

The above example will return the string 'foo.bar' or something like 'f.b' if the dataclasses have custom storage names set. It will also be static-type-checked, triggering an error if MyType.foo.bar is not a valid path. Note, however, that the callback technically allows any return value but only nested dataclasses and their fields will succeed.

def paths(self, callback: 'Callable[[T], list[Any]]') -> list[str]:
 96    def paths(self, callback: Callable[[T], list[Any]]) -> list[str]:
 97        """Look up multiple paths on child dataclass fields.
 98
 99        Functionality is identical to path() but for multiple paths at once.
100
101        example:
102          DataclassFieldLookup(MyType).paths(lambda obj: [obj.foo, obj.bar])
103        """
104        outvals: list[str] = []
105        if not TYPE_CHECKING:
106            outs = callback(_PathCapture(self.cls))
107            assert isinstance(outs, list)
108            for out in outs:
109                if not isinstance(out, _PathCapture):
110                    raise TypeError(
111                        f'Expected a valid path under'
112                        f' the provided object; got a {type(out)}.'
113                    )
114                outvals.append(out.path)
115        return outvals

Look up multiple paths on child dataclass fields.

Functionality is identical to path() but for multiple paths at once.

example: DataclassFieldLookup(MyType).paths(lambda obj: [obj.foo, obj.bar])

EXTRA_ATTRS_ATTR = '_DCIOEXATTRS'
class IOAttrs:
156class IOAttrs:
157    """For specifying io behavior in annotations.
158
159    :param storagename: If passed, is the name used when storing to
160        json/etc.
161
162    :param store_default: Can be set to False to avoid writing values
163        when equal to the default value. Note that this requires the
164        dataclass field to define a default or default_factory or for
165        its IOAttrs to define a soft_default value.
166
167    :param whole_days: If True, requires datetime values to be exactly
168        on day boundaries (see efro.util.utc_today()).
169
170    :param whole_hours: If True, requires datetime values to lie exactly
171        on hour boundaries (see efro.util.utc_this_hour()).
172
173    :param whole_minutes: If True, requires datetime values to lie
174        exactly on minute boundaries (see efro.util.utc_this_minute()).
175
176    :param soft_default: If passed, injects a default value into
177        dataclass instantiation when the field is not present in the
178        input data. This allows dataclasses to add new non-optional
179        fields while gracefully 'upgrading' old data. Note that when a
180        soft_default is present it will take precedence over field
181        defaults when determining whether to store a value for a field
182        with store_default=False (since the soft_default value is what
183        we'll get when reading that same data back in when the field is
184        omitted).
185
186    :param soft_default_factory: Is similar to 'default_factory' in
187        dataclass fields; it should be used instead of 'soft_default'
188        for mutable types such as lists to prevent a single default
189        object from unintentionally changing over time.
190
191    :param enum_fallback: If provided, specifies an enum value that can
192        be substituted in the case of unrecognized input values. This
193        can allow newer data to remain loadable in older environments.
194        Note that 'lossy' must be enabled in the top level load call for
195        this to apply, since it can fundamentally modify data.
196    """
197
198    # A sentinel object to detect if a parameter is supplied or not. Use
199    # a class to give it a better repr.
200    class _MissingType:
201
202        @override
203        def __repr__(self) -> str:
204            return '<MISSING>'
205
206    MISSING = _MissingType()
207
208    storagename: str | None = None
209    store_default: bool = True
210    whole_days: bool = False
211    whole_hours: bool = False
212    whole_minutes: bool = False
213    soft_default: Any = MISSING
214    soft_default_factory: Callable[[], Any] | _MissingType = MISSING
215    enum_fallback: Enum | None = None
216
217    def __init__(
218        self,
219        storagename: str | None = storagename,
220        *,
221        store_default: bool = store_default,
222        whole_days: bool = whole_days,
223        whole_hours: bool = whole_hours,
224        whole_minutes: bool = whole_minutes,
225        soft_default: Any = MISSING,
226        soft_default_factory: Callable[[], Any] | _MissingType = MISSING,
227        enum_fallback: Enum | None = None,
228    ):
229        # Only store values that differ from class defaults to keep
230        # our instances nice and lean.
231        cls = type(self)
232        if storagename != cls.storagename:
233            self.storagename = storagename
234        if store_default != cls.store_default:
235            self.store_default = store_default
236        if whole_days != cls.whole_days:
237            self.whole_days = whole_days
238        if whole_hours != cls.whole_hours:
239            self.whole_hours = whole_hours
240        if whole_minutes != cls.whole_minutes:
241            self.whole_minutes = whole_minutes
242        if soft_default is not cls.soft_default:
243            # Do what dataclasses does with its default types and
244            # tell the user to use factory for mutable ones.
245            if isinstance(soft_default, (list, dict, set)):
246                raise ValueError(
247                    f'mutable {type(soft_default)} is not allowed'
248                    f' for soft_default; use soft_default_factory.'
249                )
250            self.soft_default = soft_default
251        if soft_default_factory is not cls.soft_default_factory:
252            self.soft_default_factory = soft_default_factory
253            if self.soft_default is not cls.soft_default:
254                raise ValueError(
255                    'Cannot set both soft_default and soft_default_factory'
256                )
257        if enum_fallback is not cls.enum_fallback:
258            self.enum_fallback = enum_fallback
259
260    def validate_for_field(self, cls: type, field: dataclasses.Field) -> None:
261        """Ensure the IOAttrs is ok to use with provided field."""
262
263        # Turning off store_default requires the field to have either a
264        # default or a default_factory or for us to have soft
265        # equivalents.
266
267        if not self.store_default:
268            field_default_factory: Any = field.default_factory
269            if (
270                field_default_factory is dataclasses.MISSING
271                and field.default is dataclasses.MISSING
272                and self.soft_default is self.MISSING
273                and self.soft_default_factory is self.MISSING
274            ):
275                raise TypeError(
276                    f'Field {field.name} of {cls} has'
277                    f' neither a default nor a default_factory'
278                    f' and IOAttrs contains neither a soft_default'
279                    f' nor a soft_default_factory;'
280                    f' store_default=False cannot be set for it.'
281                )
282
283    def validate_datetime(
284        self, value: datetime.datetime, fieldpath: str
285    ) -> None:
286        """Ensure a datetime value meets our value requirements."""
287        if self.whole_days:
288            if any(
289                x != 0
290                for x in (
291                    value.hour,
292                    value.minute,
293                    value.second,
294                    value.microsecond,
295                )
296            ):
297                raise ValueError(
298                    f'Value {value} at {fieldpath} is not a whole day.'
299                )
300        elif self.whole_hours:
301            if any(
302                x != 0 for x in (value.minute, value.second, value.microsecond)
303            ):
304                raise ValueError(
305                    f'Value {value} at {fieldpath}' f' is not a whole hour.'
306                )
307        elif self.whole_minutes:
308            if any(x != 0 for x in (value.second, value.microsecond)):
309                raise ValueError(
310                    f'Value {value} at {fieldpath}' f' is not a whole minute.'
311                )

For specifying io behavior in annotations.

Parameters
  • storagename: If passed, is the name used when storing to json/etc.

  • store_default: Can be set to False to avoid writing values when equal to the default value. Note that this requires the dataclass field to define a default or default_factory or for its IOAttrs to define a soft_default value.

  • whole_days: If True, requires datetime values to be exactly on day boundaries (see efro.util.utc_today()).

  • whole_hours: If True, requires datetime values to lie exactly on hour boundaries (see efro.util.utc_this_hour()).

  • whole_minutes: If True, requires datetime values to lie exactly on minute boundaries (see efro.util.utc_this_minute()).

  • soft_default: If passed, injects a default value into dataclass instantiation when the field is not present in the input data. This allows dataclasses to add new non-optional fields while gracefully 'upgrading' old data. Note that when a soft_default is present it will take precedence over field defaults when determining whether to store a value for a field with store_default=False (since the soft_default value is what we'll get when reading that same data back in when the field is omitted).

  • soft_default_factory: Is similar to 'default_factory' in dataclass fields; it should be used instead of 'soft_default' for mutable types such as lists to prevent a single default object from unintentionally changing over time.

  • enum_fallback: If provided, specifies an enum value that can be substituted in the case of unrecognized input values. This can allow newer data to remain loadable in older environments. Note that 'lossy' must be enabled in the top level load call for this to apply, since it can fundamentally modify data.

IOAttrs( storagename: str | None = None, *, store_default: bool = True, whole_days: bool = False, whole_hours: bool = False, whole_minutes: bool = False, soft_default: Any = <MISSING>, soft_default_factory: Union[Callable[[], Any], efro.dataclassio._base.IOAttrs._MissingType] = <MISSING>, enum_fallback: enum.Enum | None = None)
217    def __init__(
218        self,
219        storagename: str | None = storagename,
220        *,
221        store_default: bool = store_default,
222        whole_days: bool = whole_days,
223        whole_hours: bool = whole_hours,
224        whole_minutes: bool = whole_minutes,
225        soft_default: Any = MISSING,
226        soft_default_factory: Callable[[], Any] | _MissingType = MISSING,
227        enum_fallback: Enum | None = None,
228    ):
229        # Only store values that differ from class defaults to keep
230        # our instances nice and lean.
231        cls = type(self)
232        if storagename != cls.storagename:
233            self.storagename = storagename
234        if store_default != cls.store_default:
235            self.store_default = store_default
236        if whole_days != cls.whole_days:
237            self.whole_days = whole_days
238        if whole_hours != cls.whole_hours:
239            self.whole_hours = whole_hours
240        if whole_minutes != cls.whole_minutes:
241            self.whole_minutes = whole_minutes
242        if soft_default is not cls.soft_default:
243            # Do what dataclasses does with its default types and
244            # tell the user to use factory for mutable ones.
245            if isinstance(soft_default, (list, dict, set)):
246                raise ValueError(
247                    f'mutable {type(soft_default)} is not allowed'
248                    f' for soft_default; use soft_default_factory.'
249                )
250            self.soft_default = soft_default
251        if soft_default_factory is not cls.soft_default_factory:
252            self.soft_default_factory = soft_default_factory
253            if self.soft_default is not cls.soft_default:
254                raise ValueError(
255                    'Cannot set both soft_default and soft_default_factory'
256                )
257        if enum_fallback is not cls.enum_fallback:
258            self.enum_fallback = enum_fallback
MISSING = <MISSING>
storagename: str | None = None
store_default: bool = True
whole_days: bool = False
whole_hours: bool = False
whole_minutes: bool = False
soft_default: Any = <MISSING>
soft_default_factory: Union[Callable[[], Any], efro.dataclassio._base.IOAttrs._MissingType] = <MISSING>
enum_fallback: enum.Enum | None = None
def validate_for_field(self, cls: type, field: dataclasses.Field) -> None:
260    def validate_for_field(self, cls: type, field: dataclasses.Field) -> None:
261        """Ensure the IOAttrs is ok to use with provided field."""
262
263        # Turning off store_default requires the field to have either a
264        # default or a default_factory or for us to have soft
265        # equivalents.
266
267        if not self.store_default:
268            field_default_factory: Any = field.default_factory
269            if (
270                field_default_factory is dataclasses.MISSING
271                and field.default is dataclasses.MISSING
272                and self.soft_default is self.MISSING
273                and self.soft_default_factory is self.MISSING
274            ):
275                raise TypeError(
276                    f'Field {field.name} of {cls} has'
277                    f' neither a default nor a default_factory'
278                    f' and IOAttrs contains neither a soft_default'
279                    f' nor a soft_default_factory;'
280                    f' store_default=False cannot be set for it.'
281                )

Ensure the IOAttrs is ok to use with provided field.

def validate_datetime(self, value: datetime.datetime, fieldpath: str) -> None:
283    def validate_datetime(
284        self, value: datetime.datetime, fieldpath: str
285    ) -> None:
286        """Ensure a datetime value meets our value requirements."""
287        if self.whole_days:
288            if any(
289                x != 0
290                for x in (
291                    value.hour,
292                    value.minute,
293                    value.second,
294                    value.microsecond,
295                )
296            ):
297                raise ValueError(
298                    f'Value {value} at {fieldpath} is not a whole day.'
299                )
300        elif self.whole_hours:
301            if any(
302                x != 0 for x in (value.minute, value.second, value.microsecond)
303            ):
304                raise ValueError(
305                    f'Value {value} at {fieldpath}' f' is not a whole hour.'
306                )
307        elif self.whole_minutes:
308            if any(x != 0 for x in (value.second, value.microsecond)):
309                raise ValueError(
310                    f'Value {value} at {fieldpath}' f' is not a whole minute.'
311                )

Ensure a datetime value meets our value requirements.

class IOExtendedData:
46class IOExtendedData:
47    """A class types can inherit from for extra functionality."""
48
49    def will_output(self) -> None:
50        """Called before data is sent to an outputter.
51
52        Can be overridden to validate or filter data before
53        sending it on its way.
54        """
55
56    @classmethod
57    def will_input(cls, data: dict) -> None:
58        """Called on data before a class instance is created from it.
59
60        Can be overridden to migrate old data formats to new, etc.
61        """
62
63    def did_input(self) -> None:
64        """Called on a class instance after created from data.
65
66        Can be useful to correct values from the db, etc. in the
67        type-safe form.
68        """
69
70    # pylint: disable=useless-return
71
72    @classmethod
73    def handle_input_error(cls, exc: Exception) -> Self | None:
74        """Called when an error occurs during input decoding.
75
76        This allows a type to optionally return substitute data
77        to be used in place of the failed decode. If it returns
78        None, the original exception is re-raised.
79
80        It is generally a bad idea to apply catch-alls such as this,
81        as it can lead to silent data loss. This should only be used
82        in specific cases such as user settings where an occasional
83        reset is harmless and is preferable to keeping all contained
84        enums and other values backward compatible indefinitely.
85        """
86        del exc  # Unused.
87
88        # By default we let things fail.
89        return None
90
91    # pylint: enable=useless-return

A class types can inherit from for extra functionality.

def will_output(self) -> None:
49    def will_output(self) -> None:
50        """Called before data is sent to an outputter.
51
52        Can be overridden to validate or filter data before
53        sending it on its way.
54        """

Called before data is sent to an outputter.

Can be overridden to validate or filter data before sending it on its way.

@classmethod
def will_input(cls, data: dict) -> None:
56    @classmethod
57    def will_input(cls, data: dict) -> None:
58        """Called on data before a class instance is created from it.
59
60        Can be overridden to migrate old data formats to new, etc.
61        """

Called on data before a class instance is created from it.

Can be overridden to migrate old data formats to new, etc.

def did_input(self) -> None:
63    def did_input(self) -> None:
64        """Called on a class instance after created from data.
65
66        Can be useful to correct values from the db, etc. in the
67        type-safe form.
68        """

Called on a class instance after created from data.

Can be useful to correct values from the db, etc. in the type-safe form.

@classmethod
def handle_input_error(cls, exc: Exception) -> Optional[Self]:
72    @classmethod
73    def handle_input_error(cls, exc: Exception) -> Self | None:
74        """Called when an error occurs during input decoding.
75
76        This allows a type to optionally return substitute data
77        to be used in place of the failed decode. If it returns
78        None, the original exception is re-raised.
79
80        It is generally a bad idea to apply catch-alls such as this,
81        as it can lead to silent data loss. This should only be used
82        in specific cases such as user settings where an occasional
83        reset is harmless and is preferable to keeping all contained
84        enums and other values backward compatible indefinitely.
85        """
86        del exc  # Unused.
87
88        # By default we let things fail.
89        return None

Called when an error occurs during input decoding.

This allows a type to optionally return substitute data to be used in place of the failed decode. If it returns None, the original exception is re-raised.

It is generally a bad idea to apply catch-alls such as this, as it can lead to silent data loss. This should only be used in specific cases such as user settings where an occasional reset is harmless and is preferable to keeping all contained enums and other values backward compatible indefinitely.

class IOMultiType(typing.Generic[~EnumT]):
 97class IOMultiType(Generic[EnumT]):
 98    """A base class for types that can map to multiple dataclass types.
 99
100    This enables usage of high level base classes (for example a
101    'Message' type) in annotations, with dataclassio automatically
102    serializing & deserializing dataclass subclasses based on their type
103    ('MessagePing', 'MessageChat', etc.)
104
105    Standard usage involves creating a class which inherits from this
106    one which acts as a 'registry', and then creating dataclass classes
107    inheriting from that registry class. Dataclassio will then do the
108    right thing when that registry class is used in type annotations.
109
110    See tests/test_efro/test_dataclassio.py for examples.
111    """
112
113    @classmethod
114    def get_type(cls, type_id: EnumT) -> type[Self]:
115        """Return a specific subclass given a type-id."""
116        raise NotImplementedError()
117
118    @classmethod
119    def get_type_id(cls) -> EnumT:
120        """Return the type-id for this subclass."""
121        raise NotImplementedError()
122
123    @classmethod
124    def get_type_id_type(cls) -> type[EnumT]:
125        """Return the Enum type this class uses as its type-id."""
126        out: type[EnumT] = cls.__orig_bases__[0].__args__[0]  # type: ignore
127        assert issubclass(out, Enum)
128        return out
129
130    @classmethod
131    def get_type_id_storage_name(cls) -> str:
132        """Return the key used to store type id in serialized data.
133
134        The default is an obscure value so that it does not conflict
135        with members of individual type attrs, but in some cases one
136        might prefer to serialize it to something simpler like 'type' by
137        overriding this call. One just needs to make sure that no
138        encompassed types serialize anything to 'type' themself.
139        """
140        return '_dciotype'
141
142    # NOTE: Currently (Jan 2025) mypy complains if overrides annotate
143    # return type of 'Self | None'. Substituting their own explicit type
144    # works though (see test_dataclassio).
145    @classmethod
146    def get_unknown_type_fallback(cls) -> Self | None:
147        """Return a fallback object in cases of unrecognized types.
148
149        This can allow newer data to remain readable in older
150        environments. Use caution with this option, however, as it
151        effectively modifies data.
152        """
153        return None

A base class for types that can map to multiple dataclass types.

This enables usage of high level base classes (for example a 'Message' type) in annotations, with dataclassio automatically serializing & deserializing dataclass subclasses based on their type ('MessagePing', 'MessageChat', etc.)

Standard usage involves creating a class which inherits from this one which acts as a 'registry', and then creating dataclass classes inheriting from that registry class. Dataclassio will then do the right thing when that registry class is used in type annotations.

See tests/test_efro/test_dataclassio.py for examples.

@classmethod
def get_type(cls, type_id: ~EnumT) -> type[typing.Self]:
113    @classmethod
114    def get_type(cls, type_id: EnumT) -> type[Self]:
115        """Return a specific subclass given a type-id."""
116        raise NotImplementedError()

Return a specific subclass given a type-id.

@classmethod
def get_type_id(cls) -> ~EnumT:
118    @classmethod
119    def get_type_id(cls) -> EnumT:
120        """Return the type-id for this subclass."""
121        raise NotImplementedError()

Return the type-id for this subclass.

@classmethod
def get_type_id_type(cls) -> type[~EnumT]:
123    @classmethod
124    def get_type_id_type(cls) -> type[EnumT]:
125        """Return the Enum type this class uses as its type-id."""
126        out: type[EnumT] = cls.__orig_bases__[0].__args__[0]  # type: ignore
127        assert issubclass(out, Enum)
128        return out

Return the Enum type this class uses as its type-id.

@classmethod
def get_type_id_storage_name(cls) -> str:
130    @classmethod
131    def get_type_id_storage_name(cls) -> str:
132        """Return the key used to store type id in serialized data.
133
134        The default is an obscure value so that it does not conflict
135        with members of individual type attrs, but in some cases one
136        might prefer to serialize it to something simpler like 'type' by
137        overriding this call. One just needs to make sure that no
138        encompassed types serialize anything to 'type' themself.
139        """
140        return '_dciotype'

Return the key used to store type id in serialized data.

The default is an obscure value so that it does not conflict with members of individual type attrs, but in some cases one might prefer to serialize it to something simpler like 'type' by overriding this call. One just needs to make sure that no encompassed types serialize anything to 'type' themself.

@classmethod
def get_unknown_type_fallback(cls) -> Optional[Self]:
145    @classmethod
146    def get_unknown_type_fallback(cls) -> Self | None:
147        """Return a fallback object in cases of unrecognized types.
148
149        This can allow newer data to remain readable in older
150        environments. Use caution with this option, however, as it
151        effectively modifies data.
152        """
153        return None

Return a fallback object in cases of unrecognized types.

This can allow newer data to remain readable in older environments. Use caution with this option, however, as it effectively modifies data.

class JsonStyle(enum.Enum):
28class JsonStyle(Enum):
29    """Different style types for json."""
30
31    #: Single line, no spaces, no sorting. Not deterministic.
32    #: Use this where speed is more important than determinism.
33    FAST = 'fast'
34
35    #: Single line, no spaces, sorted keys. Deterministic.
36    #: Use this when output may be hashed or compared for equality.
37    SORTED = 'sorted'
38
39    #: Multiple lines, spaces, sorted keys. Deterministic.
40    #: Use this for pretty human readable output.
41    PRETTY = 'pretty'

Different style types for json.

FAST = <JsonStyle.FAST: 'fast'>
SORTED = <JsonStyle.SORTED: 'sorted'>
PRETTY = <JsonStyle.PRETTY: 'pretty'>
def dataclass_from_dict( cls: type[~T], values: dict, *, codec: Codec = <Codec.JSON: 'json'>, coerce_to_float: bool = True, allow_unknown_attrs: bool = True, discard_unknown_attrs: bool = False, lossy: bool = False) -> ~T:
100def dataclass_from_dict(
101    cls: type[T],
102    values: dict,
103    *,
104    codec: Codec = Codec.JSON,
105    coerce_to_float: bool = True,
106    allow_unknown_attrs: bool = True,
107    discard_unknown_attrs: bool = False,
108    lossy: bool = False,
109) -> T:
110    """Given a dict, return a dataclass of a given type.
111
112    The dict must be formatted to match the specified codec (generally
113    json-friendly object types). This means that sequence values such as
114    tuples or sets should be passed as lists, enums should be passed as
115    their associated values, nested dataclasses should be passed as
116    dicts, etc.
117
118    All values are checked to ensure their types/values are valid.
119
120    Data for attributes of type Any will be checked to ensure they match
121    types supported directly by json. This does not include types such
122    as tuples which are implicitly translated by Python's json module
123    (as this would break the ability to do a lossless round-trip with
124    data).
125
126    If `coerce_to_float` is True, int values passed for float typed
127    fields will be converted to float values. Otherwise, a TypeError is
128    raised.
129
130    If 'allow_unknown_attrs' is False, AttributeErrors will be raised
131    for attributes present in the dict but not on the data class.
132    Otherwise, they will be preserved as part of the instance and
133    included if it is exported back to a dict, unless
134    `discard_unknown_attrs` is True, in which case they will simply be
135    discarded.
136
137    If `lossy` is True, Enum attrs and IOMultiType types are allowed to
138    use any fallbacks defined for them. This can allow older schemas to
139    successfully load newer data, but this can fundamentally modify the
140    data, so the resulting object is flagged as 'lossy' and prevented
141    from being serialized back out by default.
142    """
143    val = _Inputter(
144        cls,
145        codec=codec,
146        coerce_to_float=coerce_to_float,
147        allow_unknown_attrs=allow_unknown_attrs,
148        discard_unknown_attrs=discard_unknown_attrs,
149        lossy=lossy,
150    ).run(values)
151    assert isinstance(val, cls)
152    return val

Given a dict, return a dataclass of a given type.

The dict must be formatted to match the specified codec (generally json-friendly object types). This means that sequence values such as tuples or sets should be passed as lists, enums should be passed as their associated values, nested dataclasses should be passed as dicts, etc.

All values are checked to ensure their types/values are valid.

Data for attributes of type Any will be checked to ensure they match types supported directly by json. This does not include types such as tuples which are implicitly translated by Python's json module (as this would break the ability to do a lossless round-trip with data).

If coerce_to_float is True, int values passed for float typed fields will be converted to float values. Otherwise, a TypeError is raised.

If 'allow_unknown_attrs' is False, AttributeErrors will be raised for attributes present in the dict but not on the data class. Otherwise, they will be preserved as part of the instance and included if it is exported back to a dict, unless discard_unknown_attrs is True, in which case they will simply be discarded.

If lossy is True, Enum attrs and IOMultiType types are allowed to use any fallbacks defined for them. This can allow older schemas to successfully load newer data, but this can fundamentally modify the data, so the resulting object is flagged as 'lossy' and prevented from being serialized back out by default.

def dataclass_from_json( cls: type[~T], json_str: str, *, coerce_to_float: bool = True, allow_unknown_attrs: bool = True, discard_unknown_attrs: bool = False, lossy: bool = False) -> ~T:
155def dataclass_from_json(
156    cls: type[T],
157    json_str: str,
158    *,
159    coerce_to_float: bool = True,
160    allow_unknown_attrs: bool = True,
161    discard_unknown_attrs: bool = False,
162    lossy: bool = False,
163) -> T:
164    """Return a dataclass instance given a json string.
165
166    Basically dataclass_from_dict(json.loads(...))
167    """
168
169    return dataclass_from_dict(
170        cls=cls,
171        values=json.loads(json_str),
172        coerce_to_float=coerce_to_float,
173        allow_unknown_attrs=allow_unknown_attrs,
174        discard_unknown_attrs=discard_unknown_attrs,
175        lossy=lossy,
176    )

Return a dataclass instance given a json string.

Basically dataclass_from_dict(json.loads(...))

def dataclass_to_dict( obj: Any, codec: Codec = <Codec.JSON: 'json'>, coerce_to_float: bool = True, discard_extra_attrs: bool = False) -> dict:
44def dataclass_to_dict(
45    obj: Any,
46    codec: Codec = Codec.JSON,
47    coerce_to_float: bool = True,
48    discard_extra_attrs: bool = False,
49) -> dict:
50    """Given a dataclass object, return a json-friendly dict.
51
52    All values will be checked to ensure they match the types specified
53    on fields. Note that a limited set of types and data configurations is
54    supported.
55
56    Values with type Any will be checked to ensure they match types supported
57    directly by json. This does not include types such as tuples which are
58    implicitly translated by Python's json module (as this would break
59    the ability to do a lossless round-trip with data).
60
61    If coerce_to_float is True, integer values present on float typed fields
62    will be converted to float in the dict output. If False, a TypeError
63    will be triggered.
64    """
65
66    out = _Outputter(
67        obj,
68        create=True,
69        codec=codec,
70        coerce_to_float=coerce_to_float,
71        discard_extra_attrs=discard_extra_attrs,
72    ).run()
73    assert isinstance(out, dict)
74    return out

Given a dataclass object, return a json-friendly dict.

All values will be checked to ensure they match the types specified on fields. Note that a limited set of types and data configurations is supported.

Values with type Any will be checked to ensure they match types supported directly by json. This does not include types such as tuples which are implicitly translated by Python's json module (as this would break the ability to do a lossless round-trip with data).

If coerce_to_float is True, integer values present on float typed fields will be converted to float in the dict output. If False, a TypeError will be triggered.

def dataclass_to_json( obj: Any, coerce_to_float: bool = True, pretty: bool = False, sort_keys: bool | None = None) -> str:
77def dataclass_to_json(
78    obj: Any,
79    coerce_to_float: bool = True,
80    pretty: bool = False,
81    sort_keys: bool | None = None,
82) -> str:
83    """Utility function; return a json string from a dataclass instance.
84
85    Basically json.dumps(dataclass_to_dict(...)).
86    By default, keys are sorted for pretty output and not otherwise, but
87    this can be overridden by supplying a value for the 'sort_keys' arg.
88    """
89
90    jdict = dataclass_to_dict(
91        obj=obj, coerce_to_float=coerce_to_float, codec=Codec.JSON
92    )
93    if sort_keys is None:
94        sort_keys = pretty
95    if pretty:
96        return json.dumps(jdict, indent=2, sort_keys=sort_keys)
97    return json.dumps(jdict, separators=(',', ':'), sort_keys=sort_keys)

Utility function; return a json string from a dataclass instance.

Basically json.dumps(dataclass_to_dict(...)). By default, keys are sorted for pretty output and not otherwise, but this can be overridden by supplying a value for the 'sort_keys' arg.

def dataclass_validate( obj: Any, coerce_to_float: bool = True, codec: Codec = <Codec.JSON: 'json'>, discard_extra_attrs: bool = False) -> None:
179def dataclass_validate(
180    obj: Any,
181    coerce_to_float: bool = True,
182    codec: Codec = Codec.JSON,
183    discard_extra_attrs: bool = False,
184) -> None:
185    """Ensure that values in a dataclass instance are the correct types."""
186
187    # Simply run an output pass but tell it not to generate data;
188    # only run validation.
189    _Outputter(
190        obj,
191        create=False,
192        codec=codec,
193        coerce_to_float=coerce_to_float,
194        discard_extra_attrs=discard_extra_attrs,
195    ).run()

Ensure that values in a dataclass instance are the correct types.

def dataclass_hash(obj: Any, coerce_to_float: bool = True) -> str:
198def dataclass_hash(obj: Any, coerce_to_float: bool = True) -> str:
199    """Calculate a hash for the provided dataclass.
200
201    Basically this emits json for the dataclass (with keys sorted
202    to keep things deterministic) and hashes the resulting string.
203    """
204    import hashlib
205    from base64 import urlsafe_b64encode
206
207    json_dict = dataclass_to_dict(
208        obj, codec=Codec.JSON, coerce_to_float=coerce_to_float
209    )
210
211    # Need to sort keys to keep things deterministic.
212    json_str = json.dumps(json_dict, separators=(',', ':'), sort_keys=True)
213
214    sha = hashlib.sha256()
215    sha.update(json_str.encode())
216
217    # Go with urlsafe base64 instead of the usual hex to save some
218    # space, and kill those ugly padding chars at the end.
219    return urlsafe_b64encode(sha.digest()).decode().strip('=')

Calculate a hash for the provided dataclass.

Basically this emits json for the dataclass (with keys sorted to keep things deterministic) and hashes the resulting string.

def ioprep(cls: type, globalns: dict | None = None) -> None:
46def ioprep(cls: type, globalns: dict | None = None) -> None:
47    """Prep a dataclass type for use with this module's functionality.
48
49    Prepping ensures that all types contained in a data class as well as
50    the usage of said types are supported by this module and pre-builds
51    necessary constructs needed for encoding/decoding/etc.
52
53    Prepping will happen on-the-fly as needed, but a warning will be
54    emitted in such cases, as it is better to explicitly prep all used types
55    early in a process to ensure any invalid types or configuration are caught
56    immediately.
57
58    Prepping a dataclass involves evaluating its type annotations, which,
59    as of PEP 563, are stored simply as strings. This evaluation is done
60    with localns set to the class dict (so that types defined in the class
61    can be used) and globalns set to the containing module's class.
62    It is possible to override globalns for special cases such as when
63    prepping happens as part of an execed string instead of within a
64    module.
65    """
66    PrepSession(explicit=True, globalns=globalns).prep_dataclass(
67        cls, recursion_level=0
68    )

Prep a dataclass type for use with this module's functionality.

Prepping ensures that all types contained in a data class as well as the usage of said types are supported by this module and pre-builds necessary constructs needed for encoding/decoding/etc.

Prepping will happen on-the-fly as needed, but a warning will be emitted in such cases, as it is better to explicitly prep all used types early in a process to ensure any invalid types or configuration are caught immediately.

Prepping a dataclass involves evaluating its type annotations, which, as of PEP 563, are stored simply as strings. This evaluation is done with localns set to the class dict (so that types defined in the class can be used) and globalns set to the containing module's class. It is possible to override globalns for special cases such as when prepping happens as part of an execed string instead of within a module.

def ioprepped(cls: 'type[T]') -> 'type[T]':
71def ioprepped[T](cls: type[T]) -> type[T]:
72    """Class decorator for easily prepping a dataclass at definition time.
73
74    Note that in some cases it may not be possible to prep a dataclass
75    immediately (such as when its type annotations refer to forward-declared
76    types). In these cases, dataclass_prep() should be explicitly called for
77    the class as soon as possible; ideally at module import time to expose any
78    errors as early as possible in execution.
79    """
80    ioprep(cls)
81    return cls

Class decorator for easily prepping a dataclass at definition time.

Note that in some cases it may not be possible to prep a dataclass immediately (such as when its type annotations refer to forward-declared types). In these cases, dataclass_prep() should be explicitly called for the class as soon as possible; ideally at module import time to expose any errors as early as possible in execution.

def is_ioprepped_dataclass(obj: Any) -> bool:
101def is_ioprepped_dataclass(obj: Any) -> bool:
102    """Return whether the obj is an ioprepped dataclass type or instance."""
103    cls = obj if isinstance(obj, type) else type(obj)
104    return dataclasses.is_dataclass(cls) and hasattr(cls, PREP_ATTR)

Return whether the obj is an ioprepped dataclass type or instance.

def will_ioprep(cls: 'type[T]') -> 'type[T]':
84def will_ioprep[T](cls: type[T]) -> type[T]:
85    """Class decorator hinting that we will prep a class later.
86
87    In some cases (such as recursive types) we cannot use the @ioprepped
88    decorator and must instead call ioprep() explicitly later. However,
89    some of our custom pylint checking behaves differently when the
90    @ioprepped decorator is present, in that case requiring type annotations
91    to be present and not simply forward declared under an "if TYPE_CHECKING"
92    block. (since they are used at runtime).
93
94    The @will_ioprep decorator triggers the same pylint behavior
95    differences as @ioprepped (which are necessary for the later ioprep() call
96    to work correctly) but without actually running any prep itself.
97    """
98    return cls

Class decorator hinting that we will prep a class later.

In some cases (such as recursive types) we cannot use the @ioprepped decorator and must instead call ioprep() explicitly later. However, some of our custom pylint checking behaves differently when the @ioprepped decorator is present, in that case requiring type annotations to be present and not simply forward declared under an "if TYPE_CHECKING" block. (since they are used at runtime).

The @will_ioprep decorator triggers the same pylint behavior differences as @ioprepped (which are necessary for the later ioprep() call to work correctly) but without actually running any prep itself.