img_utils.py 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191
  1. #!/usr/bin/env python
  2. # Licensed to the Apache Software Foundation (ASF) under one
  3. # or more contributor license agreements. See the NOTICE file
  4. # distributed with this work for additional information
  5. # regarding copyright ownership. The ASF licenses this file
  6. # to you under the Apache License, Version 2.0 (the
  7. # "License"); you may not use this file except in compliance
  8. # with the License. You may obtain a copy of the License at
  9. #
  10. # http://www.apache.org/licenses/LICENSE-2.0
  11. #
  12. # Unless required by applicable law or agreed to in writing,
  13. # software distributed under the License is distributed on an
  14. # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  15. # KIND, either express or implied. See the License for the
  16. # specific language governing permissions and limitations
  17. # under the License.
  18. """Utils for documentation's images."""
  19. import argparse
  20. import logging
  21. import re
  22. from pathlib import Path
  23. from typing import Set, Tuple
  24. log = logging.getLogger(__file__)
  25. log.addHandler(logging.StreamHandler())
  26. root_dir: Path = Path(__file__).parent
  27. img_dir: Path = root_dir.joinpath("img")
  28. doc_dir: Path = root_dir.joinpath("docs")
  29. dev_en_dir: Path = doc_dir.joinpath("en", "development")
  30. dev_zh_dir: Path = doc_dir.joinpath("zh", "development")
  31. def get_files_recurse(path: Path) -> Set:
  32. """Get all files recursively from given :param:`path`."""
  33. res = set()
  34. for p in path.rglob("*"):
  35. if p.is_dir():
  36. continue
  37. res.add(p)
  38. return res
  39. def get_paths_uniq_suffix(paths: Set[Path]) -> Set:
  40. """Get file suffix without dot in given :param:`paths`."""
  41. res = set()
  42. for path in paths:
  43. if path.suffix == "":
  44. log.warning("There is a path %s without suffix.", path)
  45. res.add(path.suffix[1:])
  46. return res
  47. def get_paths_rel_path(paths: Set[Path], rel: Path) -> Set:
  48. """Get files relative path to :param:`rel` with ``/`` prefix from given :param:`paths`."""
  49. return {f"/{path.relative_to(rel)}" for path in paths}
  50. def get_docs_img_path(paths: Set[Path]) -> Set:
  51. """Get all img syntax from given :param:`paths` using the regexp from :param:`pattern`."""
  52. res = set()
  53. pattern = re.compile(r"../img[\w./-]+")
  54. for path in paths:
  55. content = path.read_text()
  56. find = pattern.findall(content)
  57. if find:
  58. res |= {item.lstrip(".") for item in find}
  59. return res
  60. def del_rel_path(paths: Set[str]) -> None:
  61. """Delete all relative :param:`paths` from current root/docs directory."""
  62. for path in paths:
  63. log.debug("Deleting file in the path %s", path)
  64. root_dir.joinpath(path.lstrip("/")).unlink()
  65. def del_empty_dir_recurse(path: Path) -> None:
  66. """Delete all empty directory recursively from given :param:`paths`."""
  67. for p in path.rglob("*"):
  68. if p.is_dir() and not any(p.iterdir()):
  69. log.debug("Deleting directory in the path %s", p)
  70. p.rmdir()
  71. def diff_two_set(first: Set, second: Set) -> Tuple[set, set]:
  72. """Get two set difference tuple.
  73. :return: Tuple[(first - second), (second - first)]
  74. """
  75. return first.difference(second), second.difference(first)
  76. def check_diff_img() -> Tuple[set, set]:
  77. """Check images difference files.
  78. :return: Tuple[(in_docs - in_img_dir), (in_img_dir - in_docs)]
  79. """
  80. img = get_files_recurse(img_dir)
  81. docs = get_files_recurse(doc_dir)
  82. img_rel_path = get_paths_rel_path(img, root_dir)
  83. docs_rel_path = get_docs_img_path(docs)
  84. return diff_two_set(docs_rel_path, img_rel_path)
  85. def check() -> None:
  86. """Runner for `check` sub command."""
  87. img_docs, img_img = check_diff_img()
  88. assert not img_docs and not img_img, (
  89. f"Images assert failed: \n"
  90. f"* Some images use in documents but do not exists in `img` directory, please add them: "
  91. f"{img_docs if img_docs else 'None'}\n"
  92. f"* Some images not use in documents but exists in `img` directory, please delete them: "
  93. f"{img_img if img_img else 'None'}\n"
  94. )
  95. def prune() -> None:
  96. """Runner for `prune` sub command."""
  97. _, img_img = check_diff_img()
  98. del_rel_path(img_img)
  99. del_empty_dir_recurse(img_dir)
  100. def dev_syntax() -> None:
  101. """Check whether directory development contain do not support syntax or not.
  102. * It should not ref document from other document in `docs` directory
  103. """
  104. pattern = re.compile("(\\(\\.\\.[\\w./-]+\\.md\\))")
  105. dev_files_path = get_files_recurse(dev_en_dir) | get_files_recurse(dev_zh_dir)
  106. get_files_recurse(dev_en_dir)
  107. for path in dev_files_path:
  108. content = path.read_text()
  109. find = pattern.findall(content)
  110. assert (
  111. not find
  112. ), f"File {str(path)} contain temporary not support syntax: {find}."
  113. def build_argparse() -> argparse.ArgumentParser:
  114. """Build argparse.ArgumentParser with specific configuration."""
  115. parser = argparse.ArgumentParser(prog="img_utils")
  116. parser.add_argument(
  117. "-v",
  118. "--verbose",
  119. dest="log_level",
  120. action="store_const",
  121. const=logging.DEBUG,
  122. default=logging.INFO,
  123. help="Show verbose or not.",
  124. )
  125. subparsers = parser.add_subparsers(
  126. title="subcommands",
  127. dest="subcommand",
  128. help="Choose one of the subcommand you want to run.",
  129. )
  130. parser_check = subparsers.add_parser(
  131. "check", help="Check whether invalid or missing img exists."
  132. )
  133. parser_check.set_defaults(func=check)
  134. parser_prune = subparsers.add_parser(
  135. "prune", help="Remove img in directory `img` but not use in directory `docs`."
  136. )
  137. parser_prune.set_defaults(func=prune)
  138. parser_prune = subparsers.add_parser(
  139. "dev-syntax",
  140. help="Check whether temporary does not support syntax in development directory.",
  141. )
  142. parser_prune.set_defaults(func=dev_syntax)
  143. # TODO Add subcommand `reorder`
  144. return parser
  145. if __name__ == "__main__":
  146. arg_parser = build_argparse()
  147. args = arg_parser.parse_args()
  148. # args = arg_parser.parse_args(["check"])
  149. log.setLevel(args.log_level)
  150. if args.log_level <= logging.DEBUG:
  151. print("All args is:", args)
  152. args.func()