ast.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547
  1. """
  2. ast
  3. ~~~
  4. The `ast` module helps Python applications to process trees of the Python
  5. abstract syntax grammar. The abstract syntax itself might change with
  6. each Python release; this module helps to find out programmatically what
  7. the current grammar looks like and allows modifications of it.
  8. An abstract syntax tree can be generated by passing `ast.PyCF_ONLY_AST` as
  9. a flag to the `compile()` builtin function or by using the `parse()`
  10. function from this module. The result will be a tree of objects whose
  11. classes all inherit from `ast.AST`.
  12. A modified abstract syntax tree can be compiled into a Python code object
  13. using the built-in `compile()` function.
  14. Additionally various helper functions are provided that make working with
  15. the trees simpler. The main intention of the helper functions and this
  16. module in general is to provide an easy to use interface for libraries
  17. that work tightly with the python syntax (template engines for example).
  18. :copyright: Copyright 2008 by Armin Ronacher.
  19. :license: Python License.
  20. """
  21. from _ast import *
  22. def parse(source, filename='<unknown>', mode='exec', *,
  23. type_comments=False, feature_version=None):
  24. """
  25. Parse the source into an AST node.
  26. Equivalent to compile(source, filename, mode, PyCF_ONLY_AST).
  27. Pass type_comments=True to get back type comments where the syntax allows.
  28. """
  29. flags = PyCF_ONLY_AST
  30. if type_comments:
  31. flags |= PyCF_TYPE_COMMENTS
  32. if isinstance(feature_version, tuple):
  33. major, minor = feature_version # Should be a 2-tuple.
  34. assert major == 3
  35. feature_version = minor
  36. elif feature_version is None:
  37. feature_version = -1
  38. # Else it should be an int giving the minor version for 3.x.
  39. return compile(source, filename, mode, flags,
  40. _feature_version=feature_version)
  41. def literal_eval(node_or_string):
  42. """
  43. Safely evaluate an expression node or a string containing a Python
  44. expression. The string or node provided may only consist of the following
  45. Python literal structures: strings, bytes, numbers, tuples, lists, dicts,
  46. sets, booleans, and None.
  47. """
  48. if isinstance(node_or_string, str):
  49. node_or_string = parse(node_or_string, mode='eval')
  50. if isinstance(node_or_string, Expression):
  51. node_or_string = node_or_string.body
  52. def _raise_malformed_node(node):
  53. raise ValueError(f'malformed node or string: {node!r}')
  54. def _convert_num(node):
  55. if not isinstance(node, Constant) or type(node.value) not in (int, float, complex):
  56. _raise_malformed_node(node)
  57. return node.value
  58. def _convert_signed_num(node):
  59. if isinstance(node, UnaryOp) and isinstance(node.op, (UAdd, USub)):
  60. operand = _convert_num(node.operand)
  61. if isinstance(node.op, UAdd):
  62. return + operand
  63. else:
  64. return - operand
  65. return _convert_num(node)
  66. def _convert(node):
  67. if isinstance(node, Constant):
  68. return node.value
  69. elif isinstance(node, Tuple):
  70. return tuple(map(_convert, node.elts))
  71. elif isinstance(node, List):
  72. return list(map(_convert, node.elts))
  73. elif isinstance(node, Set):
  74. return set(map(_convert, node.elts))
  75. elif isinstance(node, Dict):
  76. if len(node.keys) != len(node.values):
  77. _raise_malformed_node(node)
  78. return dict(zip(map(_convert, node.keys),
  79. map(_convert, node.values)))
  80. elif isinstance(node, BinOp) and isinstance(node.op, (Add, Sub)):
  81. left = _convert_signed_num(node.left)
  82. right = _convert_num(node.right)
  83. if isinstance(left, (int, float)) and isinstance(right, complex):
  84. if isinstance(node.op, Add):
  85. return left + right
  86. else:
  87. return left - right
  88. return _convert_signed_num(node)
  89. return _convert(node_or_string)
  90. def dump(node, annotate_fields=True, include_attributes=False):
  91. """
  92. Return a formatted dump of the tree in node. This is mainly useful for
  93. debugging purposes. If annotate_fields is true (by default),
  94. the returned string will show the names and the values for fields.
  95. If annotate_fields is false, the result string will be more compact by
  96. omitting unambiguous field names. Attributes such as line
  97. numbers and column offsets are not dumped by default. If this is wanted,
  98. include_attributes can be set to true.
  99. """
  100. def _format(node):
  101. if isinstance(node, AST):
  102. args = []
  103. keywords = annotate_fields
  104. for field in node._fields:
  105. try:
  106. value = getattr(node, field)
  107. except AttributeError:
  108. keywords = True
  109. else:
  110. if keywords:
  111. args.append('%s=%s' % (field, _format(value)))
  112. else:
  113. args.append(_format(value))
  114. if include_attributes and node._attributes:
  115. for a in node._attributes:
  116. try:
  117. args.append('%s=%s' % (a, _format(getattr(node, a))))
  118. except AttributeError:
  119. pass
  120. return '%s(%s)' % (node.__class__.__name__, ', '.join(args))
  121. elif isinstance(node, list):
  122. return '[%s]' % ', '.join(_format(x) for x in node)
  123. return repr(node)
  124. if not isinstance(node, AST):
  125. raise TypeError('expected AST, got %r' % node.__class__.__name__)
  126. return _format(node)
  127. def copy_location(new_node, old_node):
  128. """
  129. Copy source location (`lineno`, `col_offset`, `end_lineno`, and `end_col_offset`
  130. attributes) from *old_node* to *new_node* if possible, and return *new_node*.
  131. """
  132. for attr in 'lineno', 'col_offset', 'end_lineno', 'end_col_offset':
  133. if attr in old_node._attributes and attr in new_node._attributes:
  134. value = getattr(old_node, attr, None)
  135. # end_lineno and end_col_offset are optional attributes, and they
  136. # should be copied whether the value is None or not.
  137. if value is not None or (
  138. hasattr(old_node, attr) and attr.startswith("end_")
  139. ):
  140. setattr(new_node, attr, value)
  141. return new_node
  142. def fix_missing_locations(node):
  143. """
  144. When you compile a node tree with compile(), the compiler expects lineno and
  145. col_offset attributes for every node that supports them. This is rather
  146. tedious to fill in for generated nodes, so this helper adds these attributes
  147. recursively where not already set, by setting them to the values of the
  148. parent node. It works recursively starting at *node*.
  149. """
  150. def _fix(node, lineno, col_offset, end_lineno, end_col_offset):
  151. if 'lineno' in node._attributes:
  152. if not hasattr(node, 'lineno'):
  153. node.lineno = lineno
  154. else:
  155. lineno = node.lineno
  156. if 'end_lineno' in node._attributes:
  157. if not hasattr(node, 'end_lineno'):
  158. node.end_lineno = end_lineno
  159. else:
  160. end_lineno = node.end_lineno
  161. if 'col_offset' in node._attributes:
  162. if not hasattr(node, 'col_offset'):
  163. node.col_offset = col_offset
  164. else:
  165. col_offset = node.col_offset
  166. if 'end_col_offset' in node._attributes:
  167. if not hasattr(node, 'end_col_offset'):
  168. node.end_col_offset = end_col_offset
  169. else:
  170. end_col_offset = node.end_col_offset
  171. for child in iter_child_nodes(node):
  172. _fix(child, lineno, col_offset, end_lineno, end_col_offset)
  173. _fix(node, 1, 0, 1, 0)
  174. return node
  175. def increment_lineno(node, n=1):
  176. """
  177. Increment the line number and end line number of each node in the tree
  178. starting at *node* by *n*. This is useful to "move code" to a different
  179. location in a file.
  180. """
  181. for child in walk(node):
  182. if 'lineno' in child._attributes:
  183. child.lineno = getattr(child, 'lineno', 0) + n
  184. if (
  185. "end_lineno" in child._attributes
  186. and (end_lineno := getattr(child, "end_lineno", 0)) is not None
  187. ):
  188. child.end_lineno = end_lineno + n
  189. return node
  190. def iter_fields(node):
  191. """
  192. Yield a tuple of ``(fieldname, value)`` for each field in ``node._fields``
  193. that is present on *node*.
  194. """
  195. for field in node._fields:
  196. try:
  197. yield field, getattr(node, field)
  198. except AttributeError:
  199. pass
  200. def iter_child_nodes(node):
  201. """
  202. Yield all direct child nodes of *node*, that is, all fields that are nodes
  203. and all items of fields that are lists of nodes.
  204. """
  205. for name, field in iter_fields(node):
  206. if isinstance(field, AST):
  207. yield field
  208. elif isinstance(field, list):
  209. for item in field:
  210. if isinstance(item, AST):
  211. yield item
  212. def get_docstring(node, clean=True):
  213. """
  214. Return the docstring for the given node or None if no docstring can
  215. be found. If the node provided does not have docstrings a TypeError
  216. will be raised.
  217. If *clean* is `True`, all tabs are expanded to spaces and any whitespace
  218. that can be uniformly removed from the second line onwards is removed.
  219. """
  220. if not isinstance(node, (AsyncFunctionDef, FunctionDef, ClassDef, Module)):
  221. raise TypeError("%r can't have docstrings" % node.__class__.__name__)
  222. if not(node.body and isinstance(node.body[0], Expr)):
  223. return None
  224. node = node.body[0].value
  225. if isinstance(node, Str):
  226. text = node.s
  227. elif isinstance(node, Constant) and isinstance(node.value, str):
  228. text = node.value
  229. else:
  230. return None
  231. if clean:
  232. import inspect
  233. text = inspect.cleandoc(text)
  234. return text
  235. def _splitlines_no_ff(source):
  236. """Split a string into lines ignoring form feed and other chars.
  237. This mimics how the Python parser splits source code.
  238. """
  239. idx = 0
  240. lines = []
  241. next_line = ''
  242. while idx < len(source):
  243. c = source[idx]
  244. next_line += c
  245. idx += 1
  246. # Keep \r\n together
  247. if c == '\r' and idx < len(source) and source[idx] == '\n':
  248. next_line += '\n'
  249. idx += 1
  250. if c in '\r\n':
  251. lines.append(next_line)
  252. next_line = ''
  253. if next_line:
  254. lines.append(next_line)
  255. return lines
  256. def _pad_whitespace(source):
  257. r"""Replace all chars except '\f\t' in a line with spaces."""
  258. result = ''
  259. for c in source:
  260. if c in '\f\t':
  261. result += c
  262. else:
  263. result += ' '
  264. return result
  265. def get_source_segment(source, node, *, padded=False):
  266. """Get source code segment of the *source* that generated *node*.
  267. If some location information (`lineno`, `end_lineno`, `col_offset`,
  268. or `end_col_offset`) is missing, return None.
  269. If *padded* is `True`, the first line of a multi-line statement will
  270. be padded with spaces to match its original position.
  271. """
  272. try:
  273. lineno = node.lineno - 1
  274. end_lineno = node.end_lineno - 1
  275. col_offset = node.col_offset
  276. end_col_offset = node.end_col_offset
  277. except AttributeError:
  278. return None
  279. lines = _splitlines_no_ff(source)
  280. if end_lineno == lineno:
  281. return lines[lineno].encode()[col_offset:end_col_offset].decode()
  282. if padded:
  283. padding = _pad_whitespace(lines[lineno].encode()[:col_offset].decode())
  284. else:
  285. padding = ''
  286. first = padding + lines[lineno].encode()[col_offset:].decode()
  287. last = lines[end_lineno].encode()[:end_col_offset].decode()
  288. lines = lines[lineno+1:end_lineno]
  289. lines.insert(0, first)
  290. lines.append(last)
  291. return ''.join(lines)
  292. def walk(node):
  293. """
  294. Recursively yield all descendant nodes in the tree starting at *node*
  295. (including *node* itself), in no specified order. This is useful if you
  296. only want to modify nodes in place and don't care about the context.
  297. """
  298. from collections import deque
  299. todo = deque([node])
  300. while todo:
  301. node = todo.popleft()
  302. todo.extend(iter_child_nodes(node))
  303. yield node
  304. class NodeVisitor(object):
  305. """
  306. A node visitor base class that walks the abstract syntax tree and calls a
  307. visitor function for every node found. This function may return a value
  308. which is forwarded by the `visit` method.
  309. This class is meant to be subclassed, with the subclass adding visitor
  310. methods.
  311. Per default the visitor functions for the nodes are ``'visit_'`` +
  312. class name of the node. So a `TryFinally` node visit function would
  313. be `visit_TryFinally`. This behavior can be changed by overriding
  314. the `visit` method. If no visitor function exists for a node
  315. (return value `None`) the `generic_visit` visitor is used instead.
  316. Don't use the `NodeVisitor` if you want to apply changes to nodes during
  317. traversing. For this a special visitor exists (`NodeTransformer`) that
  318. allows modifications.
  319. """
  320. def visit(self, node):
  321. """Visit a node."""
  322. method = 'visit_' + node.__class__.__name__
  323. visitor = getattr(self, method, self.generic_visit)
  324. return visitor(node)
  325. def generic_visit(self, node):
  326. """Called if no explicit visitor function exists for a node."""
  327. for field, value in iter_fields(node):
  328. if isinstance(value, list):
  329. for item in value:
  330. if isinstance(item, AST):
  331. self.visit(item)
  332. elif isinstance(value, AST):
  333. self.visit(value)
  334. def visit_Constant(self, node):
  335. value = node.value
  336. type_name = _const_node_type_names.get(type(value))
  337. if type_name is None:
  338. for cls, name in _const_node_type_names.items():
  339. if isinstance(value, cls):
  340. type_name = name
  341. break
  342. if type_name is not None:
  343. method = 'visit_' + type_name
  344. try:
  345. visitor = getattr(self, method)
  346. except AttributeError:
  347. pass
  348. else:
  349. import warnings
  350. warnings.warn(f"{method} is deprecated; add visit_Constant",
  351. PendingDeprecationWarning, 2)
  352. return visitor(node)
  353. return self.generic_visit(node)
  354. class NodeTransformer(NodeVisitor):
  355. """
  356. A :class:`NodeVisitor` subclass that walks the abstract syntax tree and
  357. allows modification of nodes.
  358. The `NodeTransformer` will walk the AST and use the return value of the
  359. visitor methods to replace or remove the old node. If the return value of
  360. the visitor method is ``None``, the node will be removed from its location,
  361. otherwise it is replaced with the return value. The return value may be the
  362. original node in which case no replacement takes place.
  363. Here is an example transformer that rewrites all occurrences of name lookups
  364. (``foo``) to ``data['foo']``::
  365. class RewriteName(NodeTransformer):
  366. def visit_Name(self, node):
  367. return Subscript(
  368. value=Name(id='data', ctx=Load()),
  369. slice=Index(value=Str(s=node.id)),
  370. ctx=node.ctx
  371. )
  372. Keep in mind that if the node you're operating on has child nodes you must
  373. either transform the child nodes yourself or call the :meth:`generic_visit`
  374. method for the node first.
  375. For nodes that were part of a collection of statements (that applies to all
  376. statement nodes), the visitor may also return a list of nodes rather than
  377. just a single node.
  378. Usually you use the transformer like this::
  379. node = YourTransformer().visit(node)
  380. """
  381. def generic_visit(self, node):
  382. for field, old_value in iter_fields(node):
  383. if isinstance(old_value, list):
  384. new_values = []
  385. for value in old_value:
  386. if isinstance(value, AST):
  387. value = self.visit(value)
  388. if value is None:
  389. continue
  390. elif not isinstance(value, AST):
  391. new_values.extend(value)
  392. continue
  393. new_values.append(value)
  394. old_value[:] = new_values
  395. elif isinstance(old_value, AST):
  396. new_node = self.visit(old_value)
  397. if new_node is None:
  398. delattr(node, field)
  399. else:
  400. setattr(node, field, new_node)
  401. return node
  402. # The following code is for backward compatibility.
  403. # It will be removed in future.
  404. def _getter(self):
  405. return self.value
  406. def _setter(self, value):
  407. self.value = value
  408. Constant.n = property(_getter, _setter)
  409. Constant.s = property(_getter, _setter)
  410. class _ABC(type):
  411. def __instancecheck__(cls, inst):
  412. if not isinstance(inst, Constant):
  413. return False
  414. if cls in _const_types:
  415. try:
  416. value = inst.value
  417. except AttributeError:
  418. return False
  419. else:
  420. return (
  421. isinstance(value, _const_types[cls]) and
  422. not isinstance(value, _const_types_not.get(cls, ()))
  423. )
  424. return type.__instancecheck__(cls, inst)
  425. def _new(cls, *args, **kwargs):
  426. for key in kwargs:
  427. if key not in cls._fields:
  428. # arbitrary keyword arguments are accepted
  429. continue
  430. pos = cls._fields.index(key)
  431. if pos < len(args):
  432. raise TypeError(f"{cls.__name__} got multiple values for argument {key!r}")
  433. if cls in _const_types:
  434. return Constant(*args, **kwargs)
  435. return Constant.__new__(cls, *args, **kwargs)
  436. class Num(Constant, metaclass=_ABC):
  437. _fields = ('n',)
  438. __new__ = _new
  439. class Str(Constant, metaclass=_ABC):
  440. _fields = ('s',)
  441. __new__ = _new
  442. class Bytes(Constant, metaclass=_ABC):
  443. _fields = ('s',)
  444. __new__ = _new
  445. class NameConstant(Constant, metaclass=_ABC):
  446. __new__ = _new
  447. class Ellipsis(Constant, metaclass=_ABC):
  448. _fields = ()
  449. def __new__(cls, *args, **kwargs):
  450. if cls is Ellipsis:
  451. return Constant(..., *args, **kwargs)
  452. return Constant.__new__(cls, *args, **kwargs)
  453. _const_types = {
  454. Num: (int, float, complex),
  455. Str: (str,),
  456. Bytes: (bytes,),
  457. NameConstant: (type(None), bool),
  458. Ellipsis: (type(...),),
  459. }
  460. _const_types_not = {
  461. Num: (bool,),
  462. }
  463. _const_node_type_names = {
  464. bool: 'NameConstant', # should be before int
  465. type(None): 'NameConstant',
  466. int: 'Num',
  467. float: 'Num',
  468. complex: 'Num',
  469. str: 'Str',
  470. bytes: 'Bytes',
  471. type(...): 'Ellipsis',
  472. }