img_utils.py 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186
  1. #!/usr/bin/env python
  2. # Licensed to the Apache Software Foundation (ASF) under one
  3. # or more contributor license agreements. See the NOTICE file
  4. # distributed with this work for additional information
  5. # regarding copyright ownership. The ASF licenses this file
  6. # to you under the Apache License, Version 2.0 (the
  7. # "License"); you may not use this file except in compliance
  8. # with the License. You may obtain a copy of the License at
  9. #
  10. # http://www.apache.org/licenses/LICENSE-2.0
  11. #
  12. # Unless required by applicable law or agreed to in writing,
  13. # software distributed under the License is distributed on an
  14. # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  15. # KIND, either express or implied. See the License for the
  16. # specific language governing permissions and limitations
  17. # under the License.
  18. """Utils for documentation's images."""
  19. import argparse
  20. import logging
  21. import re
  22. from pathlib import Path
  23. from typing import Set, Tuple
  24. log = logging.getLogger(__file__)
  25. log.addHandler(logging.StreamHandler())
  26. root_dir: Path = Path(__file__).parent
  27. img_dir: Path = root_dir.joinpath("img")
  28. doc_dir: Path = root_dir.joinpath("docs")
  29. dev_en_dir: Path = doc_dir.joinpath("en", "development")
  30. dev_zh_dir: Path = doc_dir.joinpath("zh", "development")
  31. def get_files_recurse(path: Path) -> Set:
  32. """Get all files recursively from given :param:`path`."""
  33. res = set()
  34. for p in path.rglob("*"):
  35. if p.is_dir():
  36. continue
  37. res.add(p)
  38. return res
  39. def get_paths_uniq_suffix(paths: Set[Path]) -> Set:
  40. """Get file suffix without dot in given :param:`paths`."""
  41. res = set()
  42. for path in paths:
  43. if path.suffix == "":
  44. log.warning("There is a path %s without suffix.", path)
  45. res.add(path.suffix[1:])
  46. return res
  47. def get_paths_rel_path(paths: Set[Path], rel: Path) -> Set:
  48. """Get files relative path to :param:`rel` with ``/`` prefix from given :param:`paths`."""
  49. return {f"/{path.relative_to(rel)}" for path in paths}
  50. def get_docs_img_path(paths: Set[Path]) -> Set:
  51. """Get all img syntax from given :param:`paths` using the regexp from :param:`pattern`."""
  52. res = set()
  53. pattern = re.compile(r"../img[\w./-]+")
  54. for path in paths:
  55. content = path.read_text()
  56. find = pattern.findall(content)
  57. if find:
  58. res |= {item.lstrip(".") for item in find}
  59. return res
  60. def del_rel_path(paths: Set[str]) -> None:
  61. """Delete all relative :param:`paths` from current root/docs directory."""
  62. for path in paths:
  63. log.debug("Deleting file in the path %s", path)
  64. root_dir.joinpath(path.lstrip("/")).unlink()
  65. def del_empty_dir_recurse(path: Path) -> None:
  66. """Delete all empty directory recursively from given :param:`paths`."""
  67. for p in path.rglob("*"):
  68. if p.is_dir() and not any(p.iterdir()):
  69. log.debug("Deleting directory in the path %s", p)
  70. p.rmdir()
  71. def diff_two_set(first: Set, second: Set) -> Tuple[set, set]:
  72. """Get two set difference tuple.
  73. :return: Tuple[(first - second), (second - first)]
  74. """
  75. return first.difference(second), second.difference(first)
  76. def check_diff_img() -> Tuple[set, set]:
  77. """Check images difference files.
  78. :return: Tuple[(in_docs - in_img_dir), (in_img_dir - in_docs)]
  79. """
  80. img = get_files_recurse(img_dir)
  81. docs = get_files_recurse(doc_dir)
  82. img_rel_path = get_paths_rel_path(img, root_dir)
  83. docs_rel_path = get_docs_img_path(docs)
  84. return diff_two_set(docs_rel_path, img_rel_path)
  85. def check() -> None:
  86. """Runner for `check` sub command."""
  87. img_docs, img_img = check_diff_img()
  88. assert not img_docs and not img_img, (
  89. f"Images assert failed: \n"
  90. f"* difference `docs` imgs to `img` is: {img_docs if img_docs else 'None'}\n"
  91. f"* difference `img` imgs to `docs` is: {img_img if img_img else 'None'}\n"
  92. )
  93. def prune() -> None:
  94. """Runner for `prune` sub command."""
  95. _, img_img = check_diff_img()
  96. del_rel_path(img_img)
  97. del_empty_dir_recurse(img_dir)
  98. def dev_syntax() -> None:
  99. """Check whether directory development contain do not support syntax or not.
  100. * It should not ref document from other document in `docs` directory
  101. """
  102. pattern = re.compile("(\\(\\.\\.[\\w./-]+\\.md\\))")
  103. dev_files_path = get_files_recurse(dev_en_dir) | get_files_recurse(dev_zh_dir)
  104. get_files_recurse(dev_en_dir)
  105. for path in dev_files_path:
  106. content = path.read_text()
  107. find = pattern.findall(content)
  108. assert not find, f"File {str(path)} contain temporary not support syntax: {find}."
  109. def build_argparse() -> argparse.ArgumentParser:
  110. """Build argparse.ArgumentParser with specific configuration."""
  111. parser = argparse.ArgumentParser(prog="img_utils")
  112. parser.add_argument(
  113. "-v",
  114. "--verbose",
  115. dest="log_level",
  116. action="store_const",
  117. const=logging.DEBUG,
  118. default=logging.INFO,
  119. help="Show verbose or not.",
  120. )
  121. subparsers = parser.add_subparsers(
  122. title="subcommands",
  123. dest="subcommand",
  124. help="Choose one of the subcommand you want to run.",
  125. )
  126. parser_check = subparsers.add_parser(
  127. "check", help="Check whether invalid or missing img exists."
  128. )
  129. parser_check.set_defaults(func=check)
  130. parser_prune = subparsers.add_parser(
  131. "prune", help="Remove img in directory `img` but not use in directory `docs`."
  132. )
  133. parser_prune.set_defaults(func=prune)
  134. parser_prune = subparsers.add_parser(
  135. "dev-syntax", help="Check whether temporary does not support syntax in development directory."
  136. )
  137. parser_prune.set_defaults(func=dev_syntax)
  138. # TODO Add subcommand `reorder`
  139. return parser
  140. if __name__ == "__main__":
  141. arg_parser = build_argparse()
  142. args = arg_parser.parse_args()
  143. # args = arg_parser.parse_args(["check"])
  144. log.setLevel(args.log_level)
  145. if args.log_level <= logging.DEBUG:
  146. print("All args is:", args)
  147. args.func()