efro.debug
Utilities for debugging memory leaks or other issues.
IMPORTANT - these functions use the gc module which looks 'under the hood' at Python and sometimes returns not-fully-initialized objects, which may cause crashes or errors due to suddenly having references to them that they didn't expect, etc. See https://github.com/python/cpython/issues/59313. For this reason, these methods should NEVER be called in production code. Enable them only for debugging situations and be aware that their use may itself cause problems. The same is true for the gc module itself.
1# Released under the MIT License. See LICENSE for details. 2# 3"""Utilities for debugging memory leaks or other issues. 4 5IMPORTANT - these functions use the gc module which looks 'under the hood' 6at Python and sometimes returns not-fully-initialized objects, which may 7cause crashes or errors due to suddenly having references to them that they 8didn't expect, etc. See https://github.com/python/cpython/issues/59313. 9For this reason, these methods should NEVER be called in production code. 10Enable them only for debugging situations and be aware that their use may 11itself cause problems. The same is true for the gc module itself. 12""" 13from __future__ import annotations 14 15import gc 16import sys 17import types 18from typing import TYPE_CHECKING 19 20if TYPE_CHECKING: 21 from typing import Any, TextIO 22 23ABS_MAX_LEVEL = 10 24 25# NOTE: In general we want this toolset to allow us to explore 26# which objects are holding references to others so we can diagnose 27# leaks/etc. It is a bit tricky to do that, however, without 28# affecting the objects we are looking at by adding temporary references 29# from module dicts, function scopes, etc. So we need to try to be 30# careful about cleaning up after ourselves and explicitly avoiding 31# returning these temporary references wherever possible. 32 33# A good test is running printrefs() repeatedly on some object that is 34# known to be static. If the list of references or the ids or any 35# the listed references changes with each run, it's a good sign that 36# we're showing some temporary objects that we should be ignoring. 37 38 39def getobjs( 40 cls: type | str, contains: str | None = None, expanded: bool = False 41) -> list[Any]: 42 """Return all garbage-collected objects matching criteria. 43 44 'type' can be an actual type or a string in which case objects 45 whose types contain that string will be returned. 46 47 If 'contains' is provided, objects will be filtered to those 48 containing that in their str() representations. 49 """ 50 51 # Don't wanna return stuff waiting to be garbage-collected. 52 gc.collect() 53 54 if not isinstance(cls, type | str): 55 raise TypeError('Expected a type or string for cls') 56 if not isinstance(contains, str | None): 57 raise TypeError('Expected a string or None for contains') 58 59 allobjs = _get_all_objects(expanded=expanded) 60 61 if isinstance(cls, str): 62 objs = [o for o in allobjs if cls in str(type(o))] 63 else: 64 objs = [o for o in allobjs if isinstance(o, cls)] 65 if contains is not None: 66 objs = [o for o in objs if contains in str(o)] 67 68 return objs 69 70 71# Recursively expand slists objects into olist, using seen to track 72# already processed objects. 73def _getr(slist: list[Any], olist: list[Any], seen: set[int]) -> None: 74 for obj in slist: 75 if id(obj) in seen: 76 continue 77 seen.add(id(obj)) 78 olist.append(obj) 79 tll = gc.get_referents(obj) 80 if tll: 81 _getr(tll, olist, seen) 82 83 84def _get_all_objects(expanded: bool) -> list[Any]: 85 """Return an expanded list of all objects. 86 87 See https://utcc.utoronto.ca/~cks/space/blog/python/GetAllObjects 88 """ 89 gcl = gc.get_objects() 90 if not expanded: 91 return gcl 92 olist: list[Any] = [] 93 seen: set[int] = set() 94 # Just in case: 95 seen.add(id(gcl)) 96 seen.add(id(olist)) 97 seen.add(id(seen)) 98 # _getr does the real work. 99 _getr(gcl, olist, seen) 100 return olist 101 102 103def getobj(objid: int, expanded: bool = False) -> Any: 104 """Return a garbage-collected object by its id. 105 106 Remember that this is VERY inefficient and should only ever be used 107 for debugging. 108 """ 109 if not isinstance(objid, int): 110 raise TypeError(f'Expected an int for objid; got a {type(objid)}.') 111 112 # Don't wanna return stuff waiting to be garbage-collected. 113 gc.collect() 114 115 allobjs = _get_all_objects(expanded=expanded) 116 for obj in allobjs: 117 if id(obj) == objid: 118 return obj 119 raise RuntimeError(f'Object with id {objid} not found.') 120 121 122def getrefs(obj: Any) -> list[Any]: 123 """Given an object, return things referencing it.""" 124 v = vars() # Ignore ref coming from locals. 125 return [o for o in gc.get_referrers(obj) if o is not v] 126 127 128def printfiles(file: TextIO | None = None) -> None: 129 """Print info about open files in the current app.""" 130 import io 131 132 file = sys.stderr if file is None else file 133 try: 134 import psutil 135 except ImportError: 136 print( 137 "Error: printfiles requires the 'psutil' module to be installed.", 138 file=file, 139 ) 140 return 141 142 proc = psutil.Process() 143 144 # Let's grab all Python file handles so we can associate raw files 145 # with their Python objects when possible. 146 fileio_ids = {obj.fileno(): obj for obj in getobjs(io.FileIO)} 147 textio_ids = {obj.fileno(): obj for obj in getobjs(io.TextIOWrapper)} 148 149 # FIXME: we could do a more limited version of this when psutil is 150 # not present that simply includes Python's files. 151 print('Files open by this app (not limited to Python\'s):', file=file) 152 for i, ofile in enumerate(proc.open_files()): 153 # Mypy doesn't know about mode apparently. 154 # (and can't use type: ignore because we don't require psutil 155 # and then mypy complains about unused ignore comment when its 156 # not present) 157 mode = getattr(ofile, 'mode') 158 assert isinstance(mode, str) 159 textio = textio_ids.get(ofile.fd) 160 textio_s = id(textio) if textio is not None else '<not found>' 161 fileio = fileio_ids.get(ofile.fd) 162 fileio_s = id(fileio) if fileio is not None else '<not found>' 163 print( 164 f'#{i+1}: path={ofile.path!r},' 165 f' fd={ofile.fd}, mode={mode!r}, TextIOWrapper={textio_s},' 166 f' FileIO={fileio_s}' 167 ) 168 169 170def printrefs( 171 obj: Any, 172 max_level: int = 2, 173 exclude_objs: list[Any] | None = None, 174 expand_ids: list[int] | None = None, 175 file: TextIO | None = None, 176) -> None: 177 """Print human readable list of objects referring to an object. 178 179 'max_level' specifies how many levels of recursion are printed. 180 'exclude_objs' can be a list of exact objects to skip if found in the 181 referrers list. This can be useful to avoid printing the local context 182 where the object was passed in from (locals(), etc). 183 'expand_ids' can be a list of object ids; if that particular object is 184 found, it will always be expanded even if max_level has been reached. 185 """ 186 _printrefs( 187 obj, 188 level=0, 189 max_level=max_level, 190 exclude_objs=[] if exclude_objs is None else exclude_objs, 191 expand_ids=[] if expand_ids is None else expand_ids, 192 file=sys.stderr if file is None else file, 193 ) 194 195 196def printtypes( 197 limit: int = 50, file: TextIO | None = None, expanded: bool = False 198) -> None: 199 """Print a human readable list of which types have the most instances.""" 200 assert limit > 0 201 objtypes: dict[str, int] = {} 202 gc.collect() # Recommended before get_objects(). 203 allobjs = _get_all_objects(expanded=expanded) 204 allobjc = len(allobjs) 205 for obj in allobjs: 206 modname = type(obj).__module__ 207 tpname = type(obj).__qualname__ 208 if modname != 'builtins': 209 tpname = f'{modname}.{tpname}' 210 objtypes[tpname] = objtypes.get(tpname, 0) + 1 211 212 # Presumably allobjs contains stack-frame/dict type stuff 213 # from this function call which in turn contain refs to allobjs. 214 # Let's try to prevent these huge lists from accumulating until 215 # the cyclical collector (hopefully) gets to them. 216 allobjs.clear() 217 del allobjs 218 219 print(f'Types most allocated ({allobjc} total objects):', file=file) 220 for i, tpitem in enumerate( 221 sorted(objtypes.items(), key=lambda x: x[1], reverse=True)[:limit] 222 ): 223 tpname, tpval = tpitem 224 percent = tpval / allobjc * 100.0 225 print(f'{i+1}: {tpname}: {tpval} ({percent:.2f}%)', file=file) 226 227 228def printsizes( 229 limit: int = 50, file: TextIO | None = None, expanded: bool = False 230) -> None: 231 """Print total allocated sizes of different types.""" 232 assert limit > 0 233 objsizes: dict[str, int] = {} 234 gc.collect() # Recommended before get_objects(). 235 allobjs = _get_all_objects(expanded=expanded) 236 totalobjsize = 0 237 238 for obj in allobjs: 239 modname = type(obj).__module__ 240 tpname = type(obj).__qualname__ 241 if modname != 'builtins': 242 tpname = f'{modname}.{tpname}' 243 objsize = sys.getsizeof(obj) 244 objsizes[tpname] = objsizes.get(tpname, 0) + objsize 245 totalobjsize += objsize 246 247 totalobjmb = totalobjsize / (1024 * 1024) 248 print( 249 f'Types with most allocated bytes ({totalobjmb:.2f} mb total):', 250 file=file, 251 ) 252 for i, tpitem in enumerate( 253 sorted(objsizes.items(), key=lambda x: x[1], reverse=True)[:limit] 254 ): 255 tpname, tpval = tpitem 256 percent = tpval / totalobjsize * 100.0 257 print(f'{i+1}: {tpname}: {tpval} ({percent:.2f}%)', file=file) 258 259 260def _desctype(obj: Any) -> str: 261 cls = type(obj) 262 # noinspection PyPep8 263 if cls is types.ModuleType: 264 return f'{type(obj).__name__} {obj.__name__}' 265 # noinspection PyPep8 266 if cls is types.MethodType: 267 bnd = 'bound' if hasattr(obj, '__self__') else 'unbound' 268 return f'{bnd} {type(obj).__name__} {obj.__name__}' 269 return f'{type(obj).__name__}' 270 271 272def _desc(obj: Any) -> str: 273 extra: str | None = None 274 if isinstance(obj, list | tuple): 275 # Print length and the first few types. 276 tps = [_desctype(i) for i in obj[:3]] 277 tpsj = ', '.join(tps) 278 tpss = ( 279 f', contains [{tpsj}, ...]' 280 if len(obj) > 3 281 else f', contains [{tpsj}]' 282 if tps 283 else '' 284 ) 285 extra = f' (len {len(obj)}{tpss})' 286 elif isinstance(obj, dict): 287 # If it seems to be the vars() for a type or module, 288 # try to identify what. 289 for ref in getrefs(obj): 290 if hasattr(ref, '__dict__') and vars(ref) is obj: 291 extra = f' (vars for {_desctype(ref)} @ {id(ref)})' 292 293 # Generic dict: print length and the first few key:type pairs. 294 if extra is None: 295 pairs = [ 296 f'{repr(n)}: {_desctype(v)}' for n, v in list(obj.items())[:3] 297 ] 298 pairsj = ', '.join(pairs) 299 pairss = ( 300 f', contains {{{pairsj}, ...}}' 301 if len(obj) > 3 302 else f', contains {{{pairsj}}}' 303 if pairs 304 else '' 305 ) 306 extra = f' (len {len(obj)}{pairss})' 307 if extra is None: 308 extra = '' 309 return f'{_desctype(obj)} @ {id(obj)}{extra}' 310 311 312def _printrefs( 313 obj: Any, 314 level: int, 315 max_level: int, 316 exclude_objs: list, 317 expand_ids: list[int], 318 file: TextIO, 319) -> None: 320 ind = ' ' * level 321 print(ind + _desc(obj), file=file) 322 v = vars() 323 if level < max_level or (id(obj) in expand_ids and level < ABS_MAX_LEVEL): 324 refs = getrefs(obj) 325 for ref in refs: 326 # It seems we tend to get a transient cell object with contents 327 # set to obj. Would be nice to understand why that happens 328 # but just ignoring it for now. 329 if isinstance(ref, types.CellType) and ref.cell_contents is obj: 330 continue 331 332 # Ignore anything we were asked to ignore. 333 if exclude_objs is not None: 334 if any(ref is eobj for eobj in exclude_objs): 335 continue 336 337 # Ignore references from our locals. 338 if ref is v: 339 continue 340 341 # The 'refs' list we just made will be listed as a referrer 342 # of this obj, so explicitly exclude it from the obj's listing. 343 _printrefs( 344 ref, 345 level=level + 1, 346 max_level=max_level, 347 exclude_objs=exclude_objs + [refs], 348 expand_ids=expand_ids, 349 file=file, 350 )
40def getobjs( 41 cls: type | str, contains: str | None = None, expanded: bool = False 42) -> list[Any]: 43 """Return all garbage-collected objects matching criteria. 44 45 'type' can be an actual type or a string in which case objects 46 whose types contain that string will be returned. 47 48 If 'contains' is provided, objects will be filtered to those 49 containing that in their str() representations. 50 """ 51 52 # Don't wanna return stuff waiting to be garbage-collected. 53 gc.collect() 54 55 if not isinstance(cls, type | str): 56 raise TypeError('Expected a type or string for cls') 57 if not isinstance(contains, str | None): 58 raise TypeError('Expected a string or None for contains') 59 60 allobjs = _get_all_objects(expanded=expanded) 61 62 if isinstance(cls, str): 63 objs = [o for o in allobjs if cls in str(type(o))] 64 else: 65 objs = [o for o in allobjs if isinstance(o, cls)] 66 if contains is not None: 67 objs = [o for o in objs if contains in str(o)] 68 69 return objs
Return all garbage-collected objects matching criteria.
'type' can be an actual type or a string in which case objects whose types contain that string will be returned.
If 'contains' is provided, objects will be filtered to those containing that in their str() representations.
104def getobj(objid: int, expanded: bool = False) -> Any: 105 """Return a garbage-collected object by its id. 106 107 Remember that this is VERY inefficient and should only ever be used 108 for debugging. 109 """ 110 if not isinstance(objid, int): 111 raise TypeError(f'Expected an int for objid; got a {type(objid)}.') 112 113 # Don't wanna return stuff waiting to be garbage-collected. 114 gc.collect() 115 116 allobjs = _get_all_objects(expanded=expanded) 117 for obj in allobjs: 118 if id(obj) == objid: 119 return obj 120 raise RuntimeError(f'Object with id {objid} not found.')
Return a garbage-collected object by its id.
Remember that this is VERY inefficient and should only ever be used for debugging.
123def getrefs(obj: Any) -> list[Any]: 124 """Given an object, return things referencing it.""" 125 v = vars() # Ignore ref coming from locals. 126 return [o for o in gc.get_referrers(obj) if o is not v]
Given an object, return things referencing it.
129def printfiles(file: TextIO | None = None) -> None: 130 """Print info about open files in the current app.""" 131 import io 132 133 file = sys.stderr if file is None else file 134 try: 135 import psutil 136 except ImportError: 137 print( 138 "Error: printfiles requires the 'psutil' module to be installed.", 139 file=file, 140 ) 141 return 142 143 proc = psutil.Process() 144 145 # Let's grab all Python file handles so we can associate raw files 146 # with their Python objects when possible. 147 fileio_ids = {obj.fileno(): obj for obj in getobjs(io.FileIO)} 148 textio_ids = {obj.fileno(): obj for obj in getobjs(io.TextIOWrapper)} 149 150 # FIXME: we could do a more limited version of this when psutil is 151 # not present that simply includes Python's files. 152 print('Files open by this app (not limited to Python\'s):', file=file) 153 for i, ofile in enumerate(proc.open_files()): 154 # Mypy doesn't know about mode apparently. 155 # (and can't use type: ignore because we don't require psutil 156 # and then mypy complains about unused ignore comment when its 157 # not present) 158 mode = getattr(ofile, 'mode') 159 assert isinstance(mode, str) 160 textio = textio_ids.get(ofile.fd) 161 textio_s = id(textio) if textio is not None else '<not found>' 162 fileio = fileio_ids.get(ofile.fd) 163 fileio_s = id(fileio) if fileio is not None else '<not found>' 164 print( 165 f'#{i+1}: path={ofile.path!r},' 166 f' fd={ofile.fd}, mode={mode!r}, TextIOWrapper={textio_s},' 167 f' FileIO={fileio_s}' 168 )
Print info about open files in the current app.
171def printrefs( 172 obj: Any, 173 max_level: int = 2, 174 exclude_objs: list[Any] | None = None, 175 expand_ids: list[int] | None = None, 176 file: TextIO | None = None, 177) -> None: 178 """Print human readable list of objects referring to an object. 179 180 'max_level' specifies how many levels of recursion are printed. 181 'exclude_objs' can be a list of exact objects to skip if found in the 182 referrers list. This can be useful to avoid printing the local context 183 where the object was passed in from (locals(), etc). 184 'expand_ids' can be a list of object ids; if that particular object is 185 found, it will always be expanded even if max_level has been reached. 186 """ 187 _printrefs( 188 obj, 189 level=0, 190 max_level=max_level, 191 exclude_objs=[] if exclude_objs is None else exclude_objs, 192 expand_ids=[] if expand_ids is None else expand_ids, 193 file=sys.stderr if file is None else file, 194 )
Print human readable list of objects referring to an object.
'max_level' specifies how many levels of recursion are printed. 'exclude_objs' can be a list of exact objects to skip if found in the referrers list. This can be useful to avoid printing the local context where the object was passed in from (locals(), etc). 'expand_ids' can be a list of object ids; if that particular object is found, it will always be expanded even if max_level has been reached.
197def printtypes( 198 limit: int = 50, file: TextIO | None = None, expanded: bool = False 199) -> None: 200 """Print a human readable list of which types have the most instances.""" 201 assert limit > 0 202 objtypes: dict[str, int] = {} 203 gc.collect() # Recommended before get_objects(). 204 allobjs = _get_all_objects(expanded=expanded) 205 allobjc = len(allobjs) 206 for obj in allobjs: 207 modname = type(obj).__module__ 208 tpname = type(obj).__qualname__ 209 if modname != 'builtins': 210 tpname = f'{modname}.{tpname}' 211 objtypes[tpname] = objtypes.get(tpname, 0) + 1 212 213 # Presumably allobjs contains stack-frame/dict type stuff 214 # from this function call which in turn contain refs to allobjs. 215 # Let's try to prevent these huge lists from accumulating until 216 # the cyclical collector (hopefully) gets to them. 217 allobjs.clear() 218 del allobjs 219 220 print(f'Types most allocated ({allobjc} total objects):', file=file) 221 for i, tpitem in enumerate( 222 sorted(objtypes.items(), key=lambda x: x[1], reverse=True)[:limit] 223 ): 224 tpname, tpval = tpitem 225 percent = tpval / allobjc * 100.0 226 print(f'{i+1}: {tpname}: {tpval} ({percent:.2f}%)', file=file)
Print a human readable list of which types have the most instances.
229def printsizes( 230 limit: int = 50, file: TextIO | None = None, expanded: bool = False 231) -> None: 232 """Print total allocated sizes of different types.""" 233 assert limit > 0 234 objsizes: dict[str, int] = {} 235 gc.collect() # Recommended before get_objects(). 236 allobjs = _get_all_objects(expanded=expanded) 237 totalobjsize = 0 238 239 for obj in allobjs: 240 modname = type(obj).__module__ 241 tpname = type(obj).__qualname__ 242 if modname != 'builtins': 243 tpname = f'{modname}.{tpname}' 244 objsize = sys.getsizeof(obj) 245 objsizes[tpname] = objsizes.get(tpname, 0) + objsize 246 totalobjsize += objsize 247 248 totalobjmb = totalobjsize / (1024 * 1024) 249 print( 250 f'Types with most allocated bytes ({totalobjmb:.2f} mb total):', 251 file=file, 252 ) 253 for i, tpitem in enumerate( 254 sorted(objsizes.items(), key=lambda x: x[1], reverse=True)[:limit] 255 ): 256 tpname, tpval = tpitem 257 percent = tpval / totalobjsize * 100.0 258 print(f'{i+1}: {tpname}: {tpval} ({percent:.2f}%)', file=file)
Print total allocated sizes of different types.