pprint.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611
  1. # Author: Fred L. Drake, Jr.
  2. # fdrake@acm.org
  3. #
  4. # This is a simple little module I wrote to make life easier. I didn't
  5. # see anything quite like it in the library, though I may have overlooked
  6. # something. I wrote this when I was trying to read some heavily nested
  7. # tuples with fairly non-descriptive content. This is modeled very much
  8. # after Lisp/Scheme - style pretty-printing of lists. If you find it
  9. # useful, thank small children who sleep at night.
  10. """Support to pretty-print lists, tuples, & dictionaries recursively.
  11. Very simple, but useful, especially in debugging data structures.
  12. Classes
  13. -------
  14. PrettyPrinter()
  15. Handle pretty-printing operations onto a stream using a configured
  16. set of formatting parameters.
  17. Functions
  18. ---------
  19. pformat()
  20. Format a Python object into a pretty-printed representation.
  21. pprint()
  22. Pretty-print a Python object to a stream [default is sys.stdout].
  23. saferepr()
  24. Generate a 'standard' repr()-like value, but protect against recursive
  25. data structures.
  26. """
  27. import collections as _collections
  28. import re
  29. import sys as _sys
  30. import types as _types
  31. from io import StringIO as _StringIO
  32. __all__ = ["pprint","pformat","isreadable","isrecursive","saferepr",
  33. "PrettyPrinter", "pp"]
  34. def pprint(object, stream=None, indent=1, width=80, depth=None, *,
  35. compact=False, sort_dicts=True):
  36. """Pretty-print a Python object to a stream [default is sys.stdout]."""
  37. printer = PrettyPrinter(
  38. stream=stream, indent=indent, width=width, depth=depth,
  39. compact=compact, sort_dicts=sort_dicts)
  40. printer.pprint(object)
  41. def pformat(object, indent=1, width=80, depth=None, *,
  42. compact=False, sort_dicts=True):
  43. """Format a Python object into a pretty-printed representation."""
  44. return PrettyPrinter(indent=indent, width=width, depth=depth,
  45. compact=compact, sort_dicts=sort_dicts).pformat(object)
  46. def pp(object, *args, sort_dicts=False, **kwargs):
  47. """Pretty-print a Python object"""
  48. pprint(object, *args, sort_dicts=sort_dicts, **kwargs)
  49. def saferepr(object):
  50. """Version of repr() which can handle recursive data structures."""
  51. return _safe_repr(object, {}, None, 0, True)[0]
  52. def isreadable(object):
  53. """Determine if saferepr(object) is readable by eval()."""
  54. return _safe_repr(object, {}, None, 0, True)[1]
  55. def isrecursive(object):
  56. """Determine if object requires a recursive representation."""
  57. return _safe_repr(object, {}, None, 0, True)[2]
  58. class _safe_key:
  59. """Helper function for key functions when sorting unorderable objects.
  60. The wrapped-object will fallback to a Py2.x style comparison for
  61. unorderable types (sorting first comparing the type name and then by
  62. the obj ids). Does not work recursively, so dict.items() must have
  63. _safe_key applied to both the key and the value.
  64. """
  65. __slots__ = ['obj']
  66. def __init__(self, obj):
  67. self.obj = obj
  68. def __lt__(self, other):
  69. try:
  70. return self.obj < other.obj
  71. except TypeError:
  72. return ((str(type(self.obj)), id(self.obj)) < \
  73. (str(type(other.obj)), id(other.obj)))
  74. def _safe_tuple(t):
  75. "Helper function for comparing 2-tuples"
  76. return _safe_key(t[0]), _safe_key(t[1])
  77. class PrettyPrinter:
  78. def __init__(self, indent=1, width=80, depth=None, stream=None, *,
  79. compact=False, sort_dicts=True):
  80. """Handle pretty printing operations onto a stream using a set of
  81. configured parameters.
  82. indent
  83. Number of spaces to indent for each level of nesting.
  84. width
  85. Attempted maximum number of columns in the output.
  86. depth
  87. The maximum depth to print out nested structures.
  88. stream
  89. The desired output stream. If omitted (or false), the standard
  90. output stream available at construction will be used.
  91. compact
  92. If true, several items will be combined in one line.
  93. sort_dicts
  94. If true, dict keys are sorted.
  95. """
  96. indent = int(indent)
  97. width = int(width)
  98. if indent < 0:
  99. raise ValueError('indent must be >= 0')
  100. if depth is not None and depth <= 0:
  101. raise ValueError('depth must be > 0')
  102. if not width:
  103. raise ValueError('width must be != 0')
  104. self._depth = depth
  105. self._indent_per_level = indent
  106. self._width = width
  107. if stream is not None:
  108. self._stream = stream
  109. else:
  110. self._stream = _sys.stdout
  111. self._compact = bool(compact)
  112. self._sort_dicts = sort_dicts
  113. def pprint(self, object):
  114. self._format(object, self._stream, 0, 0, {}, 0)
  115. self._stream.write("\n")
  116. def pformat(self, object):
  117. sio = _StringIO()
  118. self._format(object, sio, 0, 0, {}, 0)
  119. return sio.getvalue()
  120. def isrecursive(self, object):
  121. return self.format(object, {}, 0, 0)[2]
  122. def isreadable(self, object):
  123. s, readable, recursive = self.format(object, {}, 0, 0)
  124. return readable and not recursive
  125. def _format(self, object, stream, indent, allowance, context, level):
  126. objid = id(object)
  127. if objid in context:
  128. stream.write(_recursion(object))
  129. self._recursive = True
  130. self._readable = False
  131. return
  132. rep = self._repr(object, context, level)
  133. max_width = self._width - indent - allowance
  134. if len(rep) > max_width:
  135. p = self._dispatch.get(type(object).__repr__, None)
  136. if p is not None:
  137. context[objid] = 1
  138. p(self, object, stream, indent, allowance, context, level + 1)
  139. del context[objid]
  140. return
  141. elif isinstance(object, dict):
  142. context[objid] = 1
  143. self._pprint_dict(object, stream, indent, allowance,
  144. context, level + 1)
  145. del context[objid]
  146. return
  147. stream.write(rep)
  148. _dispatch = {}
  149. def _pprint_dict(self, object, stream, indent, allowance, context, level):
  150. write = stream.write
  151. write('{')
  152. if self._indent_per_level > 1:
  153. write((self._indent_per_level - 1) * ' ')
  154. length = len(object)
  155. if length:
  156. if self._sort_dicts:
  157. items = sorted(object.items(), key=_safe_tuple)
  158. else:
  159. items = object.items()
  160. self._format_dict_items(items, stream, indent, allowance + 1,
  161. context, level)
  162. write('}')
  163. _dispatch[dict.__repr__] = _pprint_dict
  164. def _pprint_ordered_dict(self, object, stream, indent, allowance, context, level):
  165. if not len(object):
  166. stream.write(repr(object))
  167. return
  168. cls = object.__class__
  169. stream.write(cls.__name__ + '(')
  170. self._format(list(object.items()), stream,
  171. indent + len(cls.__name__) + 1, allowance + 1,
  172. context, level)
  173. stream.write(')')
  174. _dispatch[_collections.OrderedDict.__repr__] = _pprint_ordered_dict
  175. def _pprint_list(self, object, stream, indent, allowance, context, level):
  176. stream.write('[')
  177. self._format_items(object, stream, indent, allowance + 1,
  178. context, level)
  179. stream.write(']')
  180. _dispatch[list.__repr__] = _pprint_list
  181. def _pprint_tuple(self, object, stream, indent, allowance, context, level):
  182. stream.write('(')
  183. endchar = ',)' if len(object) == 1 else ')'
  184. self._format_items(object, stream, indent, allowance + len(endchar),
  185. context, level)
  186. stream.write(endchar)
  187. _dispatch[tuple.__repr__] = _pprint_tuple
  188. def _pprint_set(self, object, stream, indent, allowance, context, level):
  189. if not len(object):
  190. stream.write(repr(object))
  191. return
  192. typ = object.__class__
  193. if typ is set:
  194. stream.write('{')
  195. endchar = '}'
  196. else:
  197. stream.write(typ.__name__ + '({')
  198. endchar = '})'
  199. indent += len(typ.__name__) + 1
  200. object = sorted(object, key=_safe_key)
  201. self._format_items(object, stream, indent, allowance + len(endchar),
  202. context, level)
  203. stream.write(endchar)
  204. _dispatch[set.__repr__] = _pprint_set
  205. _dispatch[frozenset.__repr__] = _pprint_set
  206. def _pprint_str(self, object, stream, indent, allowance, context, level):
  207. write = stream.write
  208. if not len(object):
  209. write(repr(object))
  210. return
  211. chunks = []
  212. lines = object.splitlines(True)
  213. if level == 1:
  214. indent += 1
  215. allowance += 1
  216. max_width1 = max_width = self._width - indent
  217. for i, line in enumerate(lines):
  218. rep = repr(line)
  219. if i == len(lines) - 1:
  220. max_width1 -= allowance
  221. if len(rep) <= max_width1:
  222. chunks.append(rep)
  223. else:
  224. # A list of alternating (non-space, space) strings
  225. parts = re.findall(r'\S*\s*', line)
  226. assert parts
  227. assert not parts[-1]
  228. parts.pop() # drop empty last part
  229. max_width2 = max_width
  230. current = ''
  231. for j, part in enumerate(parts):
  232. candidate = current + part
  233. if j == len(parts) - 1 and i == len(lines) - 1:
  234. max_width2 -= allowance
  235. if len(repr(candidate)) > max_width2:
  236. if current:
  237. chunks.append(repr(current))
  238. current = part
  239. else:
  240. current = candidate
  241. if current:
  242. chunks.append(repr(current))
  243. if len(chunks) == 1:
  244. write(rep)
  245. return
  246. if level == 1:
  247. write('(')
  248. for i, rep in enumerate(chunks):
  249. if i > 0:
  250. write('\n' + ' '*indent)
  251. write(rep)
  252. if level == 1:
  253. write(')')
  254. _dispatch[str.__repr__] = _pprint_str
  255. def _pprint_bytes(self, object, stream, indent, allowance, context, level):
  256. write = stream.write
  257. if len(object) <= 4:
  258. write(repr(object))
  259. return
  260. parens = level == 1
  261. if parens:
  262. indent += 1
  263. allowance += 1
  264. write('(')
  265. delim = ''
  266. for rep in _wrap_bytes_repr(object, self._width - indent, allowance):
  267. write(delim)
  268. write(rep)
  269. if not delim:
  270. delim = '\n' + ' '*indent
  271. if parens:
  272. write(')')
  273. _dispatch[bytes.__repr__] = _pprint_bytes
  274. def _pprint_bytearray(self, object, stream, indent, allowance, context, level):
  275. write = stream.write
  276. write('bytearray(')
  277. self._pprint_bytes(bytes(object), stream, indent + 10,
  278. allowance + 1, context, level + 1)
  279. write(')')
  280. _dispatch[bytearray.__repr__] = _pprint_bytearray
  281. def _pprint_mappingproxy(self, object, stream, indent, allowance, context, level):
  282. stream.write('mappingproxy(')
  283. self._format(object.copy(), stream, indent + 13, allowance + 1,
  284. context, level)
  285. stream.write(')')
  286. _dispatch[_types.MappingProxyType.__repr__] = _pprint_mappingproxy
  287. def _format_dict_items(self, items, stream, indent, allowance, context,
  288. level):
  289. write = stream.write
  290. indent += self._indent_per_level
  291. delimnl = ',\n' + ' ' * indent
  292. last_index = len(items) - 1
  293. for i, (key, ent) in enumerate(items):
  294. last = i == last_index
  295. rep = self._repr(key, context, level)
  296. write(rep)
  297. write(': ')
  298. self._format(ent, stream, indent + len(rep) + 2,
  299. allowance if last else 1,
  300. context, level)
  301. if not last:
  302. write(delimnl)
  303. def _format_items(self, items, stream, indent, allowance, context, level):
  304. write = stream.write
  305. indent += self._indent_per_level
  306. if self._indent_per_level > 1:
  307. write((self._indent_per_level - 1) * ' ')
  308. delimnl = ',\n' + ' ' * indent
  309. delim = ''
  310. width = max_width = self._width - indent + 1
  311. it = iter(items)
  312. try:
  313. next_ent = next(it)
  314. except StopIteration:
  315. return
  316. last = False
  317. while not last:
  318. ent = next_ent
  319. try:
  320. next_ent = next(it)
  321. except StopIteration:
  322. last = True
  323. max_width -= allowance
  324. width -= allowance
  325. if self._compact:
  326. rep = self._repr(ent, context, level)
  327. w = len(rep) + 2
  328. if width < w:
  329. width = max_width
  330. if delim:
  331. delim = delimnl
  332. if width >= w:
  333. width -= w
  334. write(delim)
  335. delim = ', '
  336. write(rep)
  337. continue
  338. write(delim)
  339. delim = delimnl
  340. self._format(ent, stream, indent,
  341. allowance if last else 1,
  342. context, level)
  343. def _repr(self, object, context, level):
  344. repr, readable, recursive = self.format(object, context.copy(),
  345. self._depth, level)
  346. if not readable:
  347. self._readable = False
  348. if recursive:
  349. self._recursive = True
  350. return repr
  351. def format(self, object, context, maxlevels, level):
  352. """Format object for a specific context, returning a string
  353. and flags indicating whether the representation is 'readable'
  354. and whether the object represents a recursive construct.
  355. """
  356. return _safe_repr(object, context, maxlevels, level, self._sort_dicts)
  357. def _pprint_default_dict(self, object, stream, indent, allowance, context, level):
  358. if not len(object):
  359. stream.write(repr(object))
  360. return
  361. rdf = self._repr(object.default_factory, context, level)
  362. cls = object.__class__
  363. indent += len(cls.__name__) + 1
  364. stream.write('%s(%s,\n%s' % (cls.__name__, rdf, ' ' * indent))
  365. self._pprint_dict(object, stream, indent, allowance + 1, context, level)
  366. stream.write(')')
  367. _dispatch[_collections.defaultdict.__repr__] = _pprint_default_dict
  368. def _pprint_counter(self, object, stream, indent, allowance, context, level):
  369. if not len(object):
  370. stream.write(repr(object))
  371. return
  372. cls = object.__class__
  373. stream.write(cls.__name__ + '({')
  374. if self._indent_per_level > 1:
  375. stream.write((self._indent_per_level - 1) * ' ')
  376. items = object.most_common()
  377. self._format_dict_items(items, stream,
  378. indent + len(cls.__name__) + 1, allowance + 2,
  379. context, level)
  380. stream.write('})')
  381. _dispatch[_collections.Counter.__repr__] = _pprint_counter
  382. def _pprint_chain_map(self, object, stream, indent, allowance, context, level):
  383. if not len(object.maps):
  384. stream.write(repr(object))
  385. return
  386. cls = object.__class__
  387. stream.write(cls.__name__ + '(')
  388. indent += len(cls.__name__) + 1
  389. for i, m in enumerate(object.maps):
  390. if i == len(object.maps) - 1:
  391. self._format(m, stream, indent, allowance + 1, context, level)
  392. stream.write(')')
  393. else:
  394. self._format(m, stream, indent, 1, context, level)
  395. stream.write(',\n' + ' ' * indent)
  396. _dispatch[_collections.ChainMap.__repr__] = _pprint_chain_map
  397. def _pprint_deque(self, object, stream, indent, allowance, context, level):
  398. if not len(object):
  399. stream.write(repr(object))
  400. return
  401. cls = object.__class__
  402. stream.write(cls.__name__ + '(')
  403. indent += len(cls.__name__) + 1
  404. stream.write('[')
  405. if object.maxlen is None:
  406. self._format_items(object, stream, indent, allowance + 2,
  407. context, level)
  408. stream.write('])')
  409. else:
  410. self._format_items(object, stream, indent, 2,
  411. context, level)
  412. rml = self._repr(object.maxlen, context, level)
  413. stream.write('],\n%smaxlen=%s)' % (' ' * indent, rml))
  414. _dispatch[_collections.deque.__repr__] = _pprint_deque
  415. def _pprint_user_dict(self, object, stream, indent, allowance, context, level):
  416. self._format(object.data, stream, indent, allowance, context, level - 1)
  417. _dispatch[_collections.UserDict.__repr__] = _pprint_user_dict
  418. def _pprint_user_list(self, object, stream, indent, allowance, context, level):
  419. self._format(object.data, stream, indent, allowance, context, level - 1)
  420. _dispatch[_collections.UserList.__repr__] = _pprint_user_list
  421. def _pprint_user_string(self, object, stream, indent, allowance, context, level):
  422. self._format(object.data, stream, indent, allowance, context, level - 1)
  423. _dispatch[_collections.UserString.__repr__] = _pprint_user_string
  424. # Return triple (repr_string, isreadable, isrecursive).
  425. def _safe_repr(object, context, maxlevels, level, sort_dicts):
  426. typ = type(object)
  427. if typ in _builtin_scalars:
  428. return repr(object), True, False
  429. r = getattr(typ, "__repr__", None)
  430. if issubclass(typ, dict) and r is dict.__repr__:
  431. if not object:
  432. return "{}", True, False
  433. objid = id(object)
  434. if maxlevels and level >= maxlevels:
  435. return "{...}", False, objid in context
  436. if objid in context:
  437. return _recursion(object), False, True
  438. context[objid] = 1
  439. readable = True
  440. recursive = False
  441. components = []
  442. append = components.append
  443. level += 1
  444. if sort_dicts:
  445. items = sorted(object.items(), key=_safe_tuple)
  446. else:
  447. items = object.items()
  448. for k, v in items:
  449. krepr, kreadable, krecur = _safe_repr(k, context, maxlevels, level, sort_dicts)
  450. vrepr, vreadable, vrecur = _safe_repr(v, context, maxlevels, level, sort_dicts)
  451. append("%s: %s" % (krepr, vrepr))
  452. readable = readable and kreadable and vreadable
  453. if krecur or vrecur:
  454. recursive = True
  455. del context[objid]
  456. return "{%s}" % ", ".join(components), readable, recursive
  457. if (issubclass(typ, list) and r is list.__repr__) or \
  458. (issubclass(typ, tuple) and r is tuple.__repr__):
  459. if issubclass(typ, list):
  460. if not object:
  461. return "[]", True, False
  462. format = "[%s]"
  463. elif len(object) == 1:
  464. format = "(%s,)"
  465. else:
  466. if not object:
  467. return "()", True, False
  468. format = "(%s)"
  469. objid = id(object)
  470. if maxlevels and level >= maxlevels:
  471. return format % "...", False, objid in context
  472. if objid in context:
  473. return _recursion(object), False, True
  474. context[objid] = 1
  475. readable = True
  476. recursive = False
  477. components = []
  478. append = components.append
  479. level += 1
  480. for o in object:
  481. orepr, oreadable, orecur = _safe_repr(o, context, maxlevels, level, sort_dicts)
  482. append(orepr)
  483. if not oreadable:
  484. readable = False
  485. if orecur:
  486. recursive = True
  487. del context[objid]
  488. return format % ", ".join(components), readable, recursive
  489. rep = repr(object)
  490. return rep, (rep and not rep.startswith('<')), False
  491. _builtin_scalars = frozenset({str, bytes, bytearray, int, float, complex,
  492. bool, type(None)})
  493. def _recursion(object):
  494. return ("<Recursion on %s with id=%s>"
  495. % (type(object).__name__, id(object)))
  496. def _perfcheck(object=None):
  497. import time
  498. if object is None:
  499. object = [("string", (1, 2), [3, 4], {5: 6, 7: 8})] * 100000
  500. p = PrettyPrinter()
  501. t1 = time.perf_counter()
  502. _safe_repr(object, {}, None, 0, True)
  503. t2 = time.perf_counter()
  504. p.pformat(object)
  505. t3 = time.perf_counter()
  506. print("_safe_repr:", t2 - t1)
  507. print("pformat:", t3 - t2)
  508. def _wrap_bytes_repr(object, width, allowance):
  509. current = b''
  510. last = len(object) // 4 * 4
  511. for i in range(0, len(object), 4):
  512. part = object[i: i+4]
  513. candidate = current + part
  514. if i == last:
  515. width -= allowance
  516. if len(repr(candidate)) > width:
  517. if current:
  518. yield repr(current)
  519. current = part
  520. else:
  521. current = candidate
  522. if current:
  523. yield repr(current)
  524. if __name__ == "__main__":
  525. _perfcheck()