img_utils.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165
  1. #!/usr/bin/env python
  2. # Licensed to the Apache Software Foundation (ASF) under one
  3. # or more contributor license agreements. See the NOTICE file
  4. # distributed with this work for additional information
  5. # regarding copyright ownership. The ASF licenses this file
  6. # to you under the Apache License, Version 2.0 (the
  7. # "License"); you may not use this file except in compliance
  8. # with the License. You may obtain a copy of the License at
  9. #
  10. # http://www.apache.org/licenses/LICENSE-2.0
  11. #
  12. # Unless required by applicable law or agreed to in writing,
  13. # software distributed under the License is distributed on an
  14. # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  15. # KIND, either express or implied. See the License for the
  16. # specific language governing permissions and limitations
  17. # under the License.
  18. """Utils for documentation's images."""
  19. import argparse
  20. import logging
  21. import re
  22. from pathlib import Path
  23. from typing import Set, Tuple
  24. log = logging.getLogger(__file__)
  25. log.addHandler(logging.StreamHandler())
  26. root_dir: Path = Path(__file__).parent
  27. img_dir: Path = root_dir.joinpath("img")
  28. doc_dir: Path = root_dir.joinpath("docs")
  29. def get_files_recurse(path: Path) -> Set:
  30. """Get all files recursively from given :param:`path`."""
  31. res = set()
  32. for p in path.rglob("*"):
  33. if p.is_dir():
  34. continue
  35. res.add(p)
  36. return res
  37. def get_paths_uniq_suffix(paths: Set[Path]) -> Set:
  38. """Get file suffix without dot in given :param:`paths`."""
  39. res = set()
  40. for path in paths:
  41. if path.suffix == "":
  42. log.warning("There is a path %s without suffix.", path)
  43. res.add(path.suffix[1:])
  44. return res
  45. def get_paths_rel_path(paths: Set[Path], rel: Path) -> Set:
  46. """Get files relative path to :param:`rel` with ``/`` prefix from given :param:`paths`."""
  47. return {f"/{path.relative_to(rel)}" for path in paths}
  48. def get_docs_img_path(paths: Set[Path]) -> Set:
  49. """Get all img syntax from given :param:`paths` using the regexp from :param:`pattern`."""
  50. res = set()
  51. pattern = re.compile(r"/img[\w./-]+")
  52. for path in paths:
  53. content = path.read_text()
  54. find = pattern.findall(content)
  55. if find:
  56. res |= {item for item in find}
  57. return res
  58. def del_rel_path(paths: Set[str]) -> None:
  59. """Delete all relative :param:`paths` from current root/docs directory."""
  60. for path in paths:
  61. log.debug("Deleting file in the path %s", path)
  62. root_dir.joinpath(path.lstrip("/")).unlink()
  63. def del_empty_dir_recurse(path: Path) -> None:
  64. """Delete all empty directory recursively from given :param:`paths`."""
  65. for p in path.rglob("*"):
  66. if p.is_dir() and not any(p.iterdir()):
  67. log.debug("Deleting directory in the path %s", p)
  68. p.rmdir()
  69. def diff_two_set(first: Set, second: Set) -> Tuple[set, set]:
  70. """Get two set difference tuple.
  71. :return: Tuple[(first - second), (second - first)]
  72. """
  73. return first.difference(second), second.difference(first)
  74. def check_diff_img() -> Tuple[set, set]:
  75. """Check images difference files.
  76. :return: Tuple[(in_docs - in_img_dir), (in_img_dir - in_docs)]
  77. """
  78. img = get_files_recurse(img_dir)
  79. docs = get_files_recurse(doc_dir)
  80. img_rel_path = get_paths_rel_path(img, root_dir)
  81. docs_rel_path = get_docs_img_path(docs)
  82. return diff_two_set(docs_rel_path, img_rel_path)
  83. def check() -> None:
  84. """Runner for `check` sub command."""
  85. img_docs, img_img = check_diff_img()
  86. assert not img_docs and not img_img, (
  87. f"Images assert failed: \n"
  88. f"* difference `docs` imgs to `img` is: {img_docs if img_docs else 'None'}\n"
  89. f"* difference `img` imgs to `docs` is: {img_img if img_img else 'None'}\n"
  90. )
  91. def prune() -> None:
  92. """Runner for `prune` sub command."""
  93. _, img_img = check_diff_img()
  94. del_rel_path(img_img)
  95. del_empty_dir_recurse(img_dir)
  96. def build_argparse() -> argparse.ArgumentParser:
  97. """Build argparse.ArgumentParser with specific configuration."""
  98. parser = argparse.ArgumentParser(prog="img_utils")
  99. parser.add_argument(
  100. "-v",
  101. "--verbose",
  102. dest="log_level",
  103. action="store_const",
  104. const=logging.DEBUG,
  105. default=logging.INFO,
  106. help="Show verbose or not.",
  107. )
  108. subparsers = parser.add_subparsers(
  109. title="subcommands",
  110. dest="subcommand",
  111. help="Choose one of the subcommand you want to run.",
  112. )
  113. parser_check = subparsers.add_parser(
  114. "check", help="Check whether invalid or missing img exists."
  115. )
  116. parser_check.set_defaults(func=check)
  117. parser_prune = subparsers.add_parser(
  118. "prune", help="Remove img in directory `img` but not use in directory `docs`."
  119. )
  120. parser_prune.set_defaults(func=prune)
  121. # TODO Add subcommand `reorder`
  122. return parser
  123. if __name__ == "__main__":
  124. arg_parser = build_argparse()
  125. args = arg_parser.parse_args()
  126. # args = arg_parser.parse_args(["check"])
  127. log.setLevel(args.log_level)
  128. if args.log_level <= logging.DEBUG:
  129. print("All args is:", args)
  130. args.func()