efro.debug

Utilities for debugging memory leaks or other issues.

IMPORTANT - these functions use the gc module which looks 'under the hood' at Python and sometimes returns not-fully-initialized objects, which may cause crashes or errors due to suddenly having references to them that they didn't expect, etc. See https://github.com/python/cpython/issues/59313. For this reason, these methods should NEVER be called in production code. Enable them only for debugging situations and be aware that their use may itself cause problems. The same is true for the gc module itself.

  1# Released under the MIT License. See LICENSE for details.
  2#
  3"""Utilities for debugging memory leaks or other issues.
  4
  5IMPORTANT - these functions use the gc module which looks 'under the hood'
  6at Python and sometimes returns not-fully-initialized objects, which may
  7cause crashes or errors due to suddenly having references to them that they
  8didn't expect, etc. See https://github.com/python/cpython/issues/59313.
  9For this reason, these methods should NEVER be called in production code.
 10Enable them only for debugging situations and be aware that their use may
 11itself cause problems. The same is true for the gc module itself.
 12"""
 13from __future__ import annotations
 14
 15import gc
 16import sys
 17import types
 18from typing import TYPE_CHECKING
 19
 20if TYPE_CHECKING:
 21    from typing import Any, TextIO
 22
 23ABS_MAX_LEVEL = 10
 24
 25# NOTE: In general we want this toolset to allow us to explore
 26# which objects are holding references to others so we can diagnose
 27# leaks/etc. It is a bit tricky to do that, however, without
 28# affecting the objects we are looking at by adding temporary references
 29# from module dicts, function scopes, etc. So we need to try to be
 30# careful about cleaning up after ourselves and explicitly avoiding
 31# returning these temporary references wherever possible.
 32
 33# A good test is running printrefs() repeatedly on some object that is
 34# known to be static. If the list of references or the ids or any
 35# the listed references changes with each run, it's a good sign that
 36# we're showing some temporary objects that we should be ignoring.
 37
 38
 39def getobjs(
 40    cls: type | str, contains: str | None = None, expanded: bool = False
 41) -> list[Any]:
 42    """Return all garbage-collected objects matching criteria.
 43
 44    'type' can be an actual type or a string in which case objects
 45    whose types contain that string will be returned.
 46
 47    If 'contains' is provided, objects will be filtered to those
 48    containing that in their str() representations.
 49    """
 50
 51    # Don't wanna return stuff waiting to be garbage-collected.
 52    gc.collect()
 53
 54    if not isinstance(cls, type | str):
 55        raise TypeError('Expected a type or string for cls')
 56    if not isinstance(contains, str | None):
 57        raise TypeError('Expected a string or None for contains')
 58
 59    allobjs = _get_all_objects(expanded=expanded)
 60
 61    if isinstance(cls, str):
 62        objs = [o for o in allobjs if cls in str(type(o))]
 63    else:
 64        objs = [o for o in allobjs if isinstance(o, cls)]
 65    if contains is not None:
 66        objs = [o for o in objs if contains in str(o)]
 67
 68    return objs
 69
 70
 71# Recursively expand slists objects into olist, using seen to track
 72# already processed objects.
 73def _getr(slist: list[Any], olist: list[Any], seen: set[int]) -> None:
 74    for obj in slist:
 75        if id(obj) in seen:
 76            continue
 77        seen.add(id(obj))
 78        olist.append(obj)
 79        tll = gc.get_referents(obj)
 80        if tll:
 81            _getr(tll, olist, seen)
 82
 83
 84def _get_all_objects(expanded: bool) -> list[Any]:
 85    """Return an expanded list of all objects.
 86
 87    See https://utcc.utoronto.ca/~cks/space/blog/python/GetAllObjects
 88    """
 89    gcl = gc.get_objects()
 90    if not expanded:
 91        return gcl
 92    olist: list[Any] = []
 93    seen: set[int] = set()
 94    # Just in case:
 95    seen.add(id(gcl))
 96    seen.add(id(olist))
 97    seen.add(id(seen))
 98    # _getr does the real work.
 99    _getr(gcl, olist, seen)
100    return olist
101
102
103def getobj(objid: int, expanded: bool = False) -> Any:
104    """Return a garbage-collected object by its id.
105
106    Remember that this is VERY inefficient and should only ever be used
107    for debugging.
108    """
109    if not isinstance(objid, int):
110        raise TypeError(f'Expected an int for objid; got a {type(objid)}.')
111
112    # Don't wanna return stuff waiting to be garbage-collected.
113    gc.collect()
114
115    allobjs = _get_all_objects(expanded=expanded)
116    for obj in allobjs:
117        if id(obj) == objid:
118            return obj
119    raise RuntimeError(f'Object with id {objid} not found.')
120
121
122def getrefs(obj: Any) -> list[Any]:
123    """Given an object, return things referencing it."""
124    v = vars()  # Ignore ref coming from locals.
125    return [o for o in gc.get_referrers(obj) if o is not v]
126
127
128def printfiles(file: TextIO | None = None) -> None:
129    """Print info about open files in the current app."""
130    import io
131
132    file = sys.stderr if file is None else file
133    try:
134        import psutil
135    except ImportError:
136        print(
137            "Error: printfiles requires the 'psutil' module to be installed.",
138            file=file,
139        )
140        return
141
142    proc = psutil.Process()
143
144    # Let's grab all Python file handles so we can associate raw files
145    # with their Python objects when possible.
146    fileio_ids = {obj.fileno(): obj for obj in getobjs(io.FileIO)}
147    textio_ids = {obj.fileno(): obj for obj in getobjs(io.TextIOWrapper)}
148
149    # FIXME: we could do a more limited version of this when psutil is
150    # not present that simply includes Python's files.
151    print('Files open by this app (not limited to Python\'s):', file=file)
152    for i, ofile in enumerate(proc.open_files()):
153        # Mypy doesn't know about mode apparently.
154        # (and can't use type: ignore because we don't require psutil
155        # and then mypy complains about unused ignore comment when its
156        # not present)
157        mode = getattr(ofile, 'mode')
158        assert isinstance(mode, str)
159        textio = textio_ids.get(ofile.fd)
160        textio_s = id(textio) if textio is not None else '<not found>'
161        fileio = fileio_ids.get(ofile.fd)
162        fileio_s = id(fileio) if fileio is not None else '<not found>'
163        print(
164            f'#{i+1}: path={ofile.path!r},'
165            f' fd={ofile.fd}, mode={mode!r}, TextIOWrapper={textio_s},'
166            f' FileIO={fileio_s}'
167        )
168
169
170def printrefs(
171    obj: Any,
172    max_level: int = 2,
173    exclude_objs: list[Any] | None = None,
174    expand_ids: list[int] | None = None,
175    file: TextIO | None = None,
176) -> None:
177    """Print human readable list of objects referring to an object.
178
179    'max_level' specifies how many levels of recursion are printed.
180    'exclude_objs' can be a list of exact objects to skip if found in the
181      referrers list. This can be useful to avoid printing the local context
182      where the object was passed in from (locals(), etc).
183    'expand_ids' can be a list of object ids; if that particular object is
184      found, it will always be expanded even if max_level has been reached.
185    """
186    _printrefs(
187        obj,
188        level=0,
189        max_level=max_level,
190        exclude_objs=[] if exclude_objs is None else exclude_objs,
191        expand_ids=[] if expand_ids is None else expand_ids,
192        file=sys.stderr if file is None else file,
193    )
194
195
196def printtypes(
197    limit: int = 50, file: TextIO | None = None, expanded: bool = False
198) -> None:
199    """Print a human readable list of which types have the most instances."""
200    assert limit > 0
201    objtypes: dict[str, int] = {}
202    gc.collect()  # Recommended before get_objects().
203    allobjs = _get_all_objects(expanded=expanded)
204    allobjc = len(allobjs)
205    for obj in allobjs:
206        modname = type(obj).__module__
207        tpname = type(obj).__qualname__
208        if modname != 'builtins':
209            tpname = f'{modname}.{tpname}'
210        objtypes[tpname] = objtypes.get(tpname, 0) + 1
211
212    # Presumably allobjs contains stack-frame/dict type stuff
213    # from this function call which in turn contain refs to allobjs.
214    # Let's try to prevent these huge lists from accumulating until
215    # the cyclical collector (hopefully) gets to them.
216    allobjs.clear()
217    del allobjs
218
219    print(f'Types most allocated ({allobjc} total objects):', file=file)
220    for i, tpitem in enumerate(
221        sorted(objtypes.items(), key=lambda x: x[1], reverse=True)[:limit]
222    ):
223        tpname, tpval = tpitem
224        percent = tpval / allobjc * 100.0
225        print(f'{i+1}: {tpname}: {tpval} ({percent:.2f}%)', file=file)
226
227
228def printsizes(
229    limit: int = 50, file: TextIO | None = None, expanded: bool = False
230) -> None:
231    """Print total allocated sizes of different types."""
232    assert limit > 0
233    objsizes: dict[str, int] = {}
234    gc.collect()  # Recommended before get_objects().
235    allobjs = _get_all_objects(expanded=expanded)
236    totalobjsize = 0
237
238    for obj in allobjs:
239        modname = type(obj).__module__
240        tpname = type(obj).__qualname__
241        if modname != 'builtins':
242            tpname = f'{modname}.{tpname}'
243        objsize = sys.getsizeof(obj)
244        objsizes[tpname] = objsizes.get(tpname, 0) + objsize
245        totalobjsize += objsize
246
247    totalobjmb = totalobjsize / (1024 * 1024)
248    print(
249        f'Types with most allocated bytes ({totalobjmb:.2f} mb total):',
250        file=file,
251    )
252    for i, tpitem in enumerate(
253        sorted(objsizes.items(), key=lambda x: x[1], reverse=True)[:limit]
254    ):
255        tpname, tpval = tpitem
256        percent = tpval / totalobjsize * 100.0
257        print(f'{i+1}: {tpname}: {tpval} ({percent:.2f}%)', file=file)
258
259
260def _desctype(obj: Any) -> str:
261    cls = type(obj)
262    # noinspection PyPep8
263    if cls is types.ModuleType:
264        return f'{type(obj).__name__} {obj.__name__}'
265    # noinspection PyPep8
266    if cls is types.MethodType:
267        bnd = 'bound' if hasattr(obj, '__self__') else 'unbound'
268        return f'{bnd} {type(obj).__name__} {obj.__name__}'
269    return f'{type(obj).__name__}'
270
271
272def _desc(obj: Any) -> str:
273    extra: str | None = None
274    if isinstance(obj, list | tuple):
275        # Print length and the first few types.
276        tps = [_desctype(i) for i in obj[:3]]
277        tpsj = ', '.join(tps)
278        tpss = (
279            f', contains [{tpsj}, ...]'
280            if len(obj) > 3
281            else f', contains [{tpsj}]'
282            if tps
283            else ''
284        )
285        extra = f' (len {len(obj)}{tpss})'
286    elif isinstance(obj, dict):
287        # If it seems to be the vars() for a type or module,
288        # try to identify what.
289        for ref in getrefs(obj):
290            if hasattr(ref, '__dict__') and vars(ref) is obj:
291                extra = f' (vars for {_desctype(ref)} @ {id(ref)})'
292
293        # Generic dict: print length and the first few key:type pairs.
294        if extra is None:
295            pairs = [
296                f'{repr(n)}: {_desctype(v)}' for n, v in list(obj.items())[:3]
297            ]
298            pairsj = ', '.join(pairs)
299            pairss = (
300                f', contains {{{pairsj}, ...}}'
301                if len(obj) > 3
302                else f', contains {{{pairsj}}}'
303                if pairs
304                else ''
305            )
306            extra = f' (len {len(obj)}{pairss})'
307    if extra is None:
308        extra = ''
309    return f'{_desctype(obj)} @ {id(obj)}{extra}'
310
311
312def _printrefs(
313    obj: Any,
314    level: int,
315    max_level: int,
316    exclude_objs: list,
317    expand_ids: list[int],
318    file: TextIO,
319) -> None:
320    ind = '  ' * level
321    print(ind + _desc(obj), file=file)
322    v = vars()
323    if level < max_level or (id(obj) in expand_ids and level < ABS_MAX_LEVEL):
324        refs = getrefs(obj)
325        for ref in refs:
326            # It seems we tend to get a transient cell object with contents
327            # set to obj. Would be nice to understand why that happens
328            # but just ignoring it for now.
329            if isinstance(ref, types.CellType) and ref.cell_contents is obj:
330                continue
331
332            # Ignore anything we were asked to ignore.
333            if exclude_objs is not None:
334                if any(ref is eobj for eobj in exclude_objs):
335                    continue
336
337            # Ignore references from our locals.
338            if ref is v:
339                continue
340
341            # The 'refs' list we just made will be listed as a referrer
342            # of this obj, so explicitly exclude it from the obj's listing.
343            _printrefs(
344                ref,
345                level=level + 1,
346                max_level=max_level,
347                exclude_objs=exclude_objs + [refs],
348                expand_ids=expand_ids,
349                file=file,
350            )
ABS_MAX_LEVEL = 10
def getobjs( cls: type | str, contains: str | None = None, expanded: bool = False) -> list[typing.Any]:
40def getobjs(
41    cls: type | str, contains: str | None = None, expanded: bool = False
42) -> list[Any]:
43    """Return all garbage-collected objects matching criteria.
44
45    'type' can be an actual type or a string in which case objects
46    whose types contain that string will be returned.
47
48    If 'contains' is provided, objects will be filtered to those
49    containing that in their str() representations.
50    """
51
52    # Don't wanna return stuff waiting to be garbage-collected.
53    gc.collect()
54
55    if not isinstance(cls, type | str):
56        raise TypeError('Expected a type or string for cls')
57    if not isinstance(contains, str | None):
58        raise TypeError('Expected a string or None for contains')
59
60    allobjs = _get_all_objects(expanded=expanded)
61
62    if isinstance(cls, str):
63        objs = [o for o in allobjs if cls in str(type(o))]
64    else:
65        objs = [o for o in allobjs if isinstance(o, cls)]
66    if contains is not None:
67        objs = [o for o in objs if contains in str(o)]
68
69    return objs

Return all garbage-collected objects matching criteria.

'type' can be an actual type or a string in which case objects whose types contain that string will be returned.

If 'contains' is provided, objects will be filtered to those containing that in their str() representations.

def getobj(objid: int, expanded: bool = False) -> Any:
104def getobj(objid: int, expanded: bool = False) -> Any:
105    """Return a garbage-collected object by its id.
106
107    Remember that this is VERY inefficient and should only ever be used
108    for debugging.
109    """
110    if not isinstance(objid, int):
111        raise TypeError(f'Expected an int for objid; got a {type(objid)}.')
112
113    # Don't wanna return stuff waiting to be garbage-collected.
114    gc.collect()
115
116    allobjs = _get_all_objects(expanded=expanded)
117    for obj in allobjs:
118        if id(obj) == objid:
119            return obj
120    raise RuntimeError(f'Object with id {objid} not found.')

Return a garbage-collected object by its id.

Remember that this is VERY inefficient and should only ever be used for debugging.

def getrefs(obj: Any) -> list[typing.Any]:
123def getrefs(obj: Any) -> list[Any]:
124    """Given an object, return things referencing it."""
125    v = vars()  # Ignore ref coming from locals.
126    return [o for o in gc.get_referrers(obj) if o is not v]

Given an object, return things referencing it.

def printfiles(file: typing.TextIO | None = None) -> None:
129def printfiles(file: TextIO | None = None) -> None:
130    """Print info about open files in the current app."""
131    import io
132
133    file = sys.stderr if file is None else file
134    try:
135        import psutil
136    except ImportError:
137        print(
138            "Error: printfiles requires the 'psutil' module to be installed.",
139            file=file,
140        )
141        return
142
143    proc = psutil.Process()
144
145    # Let's grab all Python file handles so we can associate raw files
146    # with their Python objects when possible.
147    fileio_ids = {obj.fileno(): obj for obj in getobjs(io.FileIO)}
148    textio_ids = {obj.fileno(): obj for obj in getobjs(io.TextIOWrapper)}
149
150    # FIXME: we could do a more limited version of this when psutil is
151    # not present that simply includes Python's files.
152    print('Files open by this app (not limited to Python\'s):', file=file)
153    for i, ofile in enumerate(proc.open_files()):
154        # Mypy doesn't know about mode apparently.
155        # (and can't use type: ignore because we don't require psutil
156        # and then mypy complains about unused ignore comment when its
157        # not present)
158        mode = getattr(ofile, 'mode')
159        assert isinstance(mode, str)
160        textio = textio_ids.get(ofile.fd)
161        textio_s = id(textio) if textio is not None else '<not found>'
162        fileio = fileio_ids.get(ofile.fd)
163        fileio_s = id(fileio) if fileio is not None else '<not found>'
164        print(
165            f'#{i+1}: path={ofile.path!r},'
166            f' fd={ofile.fd}, mode={mode!r}, TextIOWrapper={textio_s},'
167            f' FileIO={fileio_s}'
168        )

Print info about open files in the current app.

def printrefs( obj: Any, max_level: int = 2, exclude_objs: list[typing.Any] | None = None, expand_ids: list[int] | None = None, file: typing.TextIO | None = None) -> None:
171def printrefs(
172    obj: Any,
173    max_level: int = 2,
174    exclude_objs: list[Any] | None = None,
175    expand_ids: list[int] | None = None,
176    file: TextIO | None = None,
177) -> None:
178    """Print human readable list of objects referring to an object.
179
180    'max_level' specifies how many levels of recursion are printed.
181    'exclude_objs' can be a list of exact objects to skip if found in the
182      referrers list. This can be useful to avoid printing the local context
183      where the object was passed in from (locals(), etc).
184    'expand_ids' can be a list of object ids; if that particular object is
185      found, it will always be expanded even if max_level has been reached.
186    """
187    _printrefs(
188        obj,
189        level=0,
190        max_level=max_level,
191        exclude_objs=[] if exclude_objs is None else exclude_objs,
192        expand_ids=[] if expand_ids is None else expand_ids,
193        file=sys.stderr if file is None else file,
194    )

Print human readable list of objects referring to an object.

'max_level' specifies how many levels of recursion are printed. 'exclude_objs' can be a list of exact objects to skip if found in the referrers list. This can be useful to avoid printing the local context where the object was passed in from (locals(), etc). 'expand_ids' can be a list of object ids; if that particular object is found, it will always be expanded even if max_level has been reached.

def printtypes( limit: int = 50, file: typing.TextIO | None = None, expanded: bool = False) -> None:
197def printtypes(
198    limit: int = 50, file: TextIO | None = None, expanded: bool = False
199) -> None:
200    """Print a human readable list of which types have the most instances."""
201    assert limit > 0
202    objtypes: dict[str, int] = {}
203    gc.collect()  # Recommended before get_objects().
204    allobjs = _get_all_objects(expanded=expanded)
205    allobjc = len(allobjs)
206    for obj in allobjs:
207        modname = type(obj).__module__
208        tpname = type(obj).__qualname__
209        if modname != 'builtins':
210            tpname = f'{modname}.{tpname}'
211        objtypes[tpname] = objtypes.get(tpname, 0) + 1
212
213    # Presumably allobjs contains stack-frame/dict type stuff
214    # from this function call which in turn contain refs to allobjs.
215    # Let's try to prevent these huge lists from accumulating until
216    # the cyclical collector (hopefully) gets to them.
217    allobjs.clear()
218    del allobjs
219
220    print(f'Types most allocated ({allobjc} total objects):', file=file)
221    for i, tpitem in enumerate(
222        sorted(objtypes.items(), key=lambda x: x[1], reverse=True)[:limit]
223    ):
224        tpname, tpval = tpitem
225        percent = tpval / allobjc * 100.0
226        print(f'{i+1}: {tpname}: {tpval} ({percent:.2f}%)', file=file)

Print a human readable list of which types have the most instances.

def printsizes( limit: int = 50, file: typing.TextIO | None = None, expanded: bool = False) -> None:
229def printsizes(
230    limit: int = 50, file: TextIO | None = None, expanded: bool = False
231) -> None:
232    """Print total allocated sizes of different types."""
233    assert limit > 0
234    objsizes: dict[str, int] = {}
235    gc.collect()  # Recommended before get_objects().
236    allobjs = _get_all_objects(expanded=expanded)
237    totalobjsize = 0
238
239    for obj in allobjs:
240        modname = type(obj).__module__
241        tpname = type(obj).__qualname__
242        if modname != 'builtins':
243            tpname = f'{modname}.{tpname}'
244        objsize = sys.getsizeof(obj)
245        objsizes[tpname] = objsizes.get(tpname, 0) + objsize
246        totalobjsize += objsize
247
248    totalobjmb = totalobjsize / (1024 * 1024)
249    print(
250        f'Types with most allocated bytes ({totalobjmb:.2f} mb total):',
251        file=file,
252    )
253    for i, tpitem in enumerate(
254        sorted(objsizes.items(), key=lambda x: x[1], reverse=True)[:limit]
255    ):
256        tpname, tpval = tpitem
257        percent = tpval / totalobjsize * 100.0
258        print(f'{i+1}: {tpname}: {tpval} ({percent:.2f}%)', file=file)

Print total allocated sizes of different types.